class UserTask(DataTask): alias = ['UserMod'] configSections = DataTask.configSections + ['UserMod', 'UserTask'] def __init__(self, config, name): DataTask.__init__(self, config, name) self._exeWrap = TaskExecutableWrapper(config) def getCommand(self): return '(%s) > job.stdout 2> job.stderr' % self._exeWrap.getCommand() def getJobArguments(self, jobNum): return DataTask.getJobArguments( self, jobNum) + ' ' + self._exeWrap.getArguments() def getSBInFiles(self): return DataTask.getSBInFiles(self) + self._exeWrap.getSBInFiles() def getSBOutFiles(self): tmp = lmap(lambda s: s + utils.QM(self.gzipOut, '.gz', ''), ['job.stdout', 'job.stderr']) return DataTask.getSBOutFiles(self) + tmp
class UserTask(DataTask): configSections = DataTask.configSections + ['UserTask'] def __init__(self, config, name): DataTask.__init__(self, config, name) self._exeWrap = TaskExecutableWrapper(config) def getCommand(self): return '(%s) > job.stdout 2> job.stderr' % self._exeWrap.getCommand() def getJobArguments(self, jobNum): return DataTask.getJobArguments(self, jobNum) + ' ' + self._exeWrap.getArguments() def getSBInFiles(self): return DataTask.getSBInFiles(self) + self._exeWrap.getSBInFiles() def getSBOutFiles(self): tmp = map(lambda s: s + utils.QM(self.gzipOut, '.gz', ''), ['job.stdout', 'job.stderr']) return DataTask.getSBOutFiles(self) + tmp
class CMSSW(SCRAMTask): configSections = SCRAMTask.configSections + ['CMSSW'] def __init__(self, config, name): config.set('se input timeout', '0:30') config.set('dataset provider', 'DBS3Provider') config.set('dataset splitter', 'EventBoundarySplitter') config.set('dataset processor', 'LumiDataProcessor', '+=') config.set('partition processor', 'TFCPartitionProcessor LocationPartitionProcessor MetaPartitionProcessor ' + 'LFNPartitionProcessor LumiPartitionProcessor CMSSWPartitionProcessor') dash_config = config.changeView(viewClass = 'SimpleConfigView', setSections = ['dashboard']) dash_config.set('application', 'cmsRun') SCRAMTask.__init__(self, config, name) if self._scramProject != 'CMSSW': raise ConfigError('Project area contains no CMSSW project') self._oldReleaseTop = None if self._projectArea: self._oldReleaseTop = self._parse_scram_file(os.path.join(self._projectArea, '.SCRAM', self._scramArch, 'Environment')).get('RELEASETOP', None) self.updateErrorDict(utils.pathShare('gc-run.cmssw.sh', pkg = 'grid_control_cms')) self._projectAreaTarballSE = config.getBool(['se runtime', 'se project area'], True) self._projectAreaTarball = config.getWorkPath('cmssw-project-area.tar.gz') # Prolog / Epilog script support - warn about old syntax self.prolog = TaskExecutableWrapper(config, 'prolog', '') self.epilog = TaskExecutableWrapper(config, 'epilog', '') if config.getPaths('executable', []) != []: raise ConfigError('Prefix executable and argument options with either prolog or epilog!') self.arguments = config.get('arguments', '') # Get cmssw config files and check their existance # Check that for dataset jobs the necessary placeholders are in the config file if self._dataSplitter is None: self.eventsPerJob = config.get('events per job', '0') # this can be a variable like @USER_EVENTS@! fragment = config.getPath('instrumentation fragment', utils.pathShare('fragmentForCMSSW.py', pkg = 'grid_control_cms')) self.configFiles = self._processConfigFiles(config, list(self._getConfigFiles(config)), fragment, autoPrepare = config.getBool('instrumentation', True), mustPrepare = (self._dataSplitter is not None)) # Create project area tarball if self._projectArea and not os.path.exists(self._projectAreaTarball): config.setState(True, 'init', detail = 'sandbox') # Information about search order for software environment self.searchLoc = self._getCMSSWPaths(config) if config.getState('init', detail = 'sandbox'): if os.path.exists(self._projectAreaTarball): if not utils.getUserBool('CMSSW tarball already exists! Do you want to regenerate it?', True): return # Generate CMSSW tarball if self._projectArea: utils.genTarball(self._projectAreaTarball, utils.matchFiles(self._projectArea, self._projectAreaPattern)) if self._projectAreaTarballSE: config.setState(True, 'init', detail = 'storage') def _getCMSSWPaths(self, config): result = [] userPath = config.get(['cmssw dir', 'vo software dir'], '') if userPath: userPathLocal = os.path.abspath(utils.cleanPath(userPath)) if os.path.exists(userPathLocal): userPath = userPathLocal if userPath: result.append(('CMSSW_DIR_USER', userPath)) if self._oldReleaseTop: projPath = os.path.normpath('%s/../../../../' % self._oldReleaseTop) result.append(('CMSSW_DIR_PRO', projPath)) log = logging.getLogger('user') log.info('Local jobs will try to use the CMSSW software located here:') for i, loc in enumerate(result): log.info(' %i) %s', i + 1, loc[1]) if result: log.info('') return result def _getConfigFiles(self, config): cfgDefault = utils.QM(self.prolog.isActive() or self.epilog.isActive(), [], noDefault) for cfgFile in config.getPaths('config file', cfgDefault, mustExist = False): if not os.path.exists(cfgFile): raise ConfigError('Config file %r not found.' % cfgFile) yield cfgFile def _cfgIsInstrumented(self, fn): fp = open(fn, 'r') try: cfg = fp.read() finally: fp.close() for tag in self.neededVars(): if (not '__%s__' % tag in cfg) and (not '@%s@' % tag in cfg): return False return True def _cfgStore(self, source, target, fragment_path = None): fp = open(source, 'r') try: content = fp.read() finally: fp.close() fp = open(target, 'w') try: fp.write(content) if fragment_path: logging.getLogger('user').info('Instrumenting... %s', os.path.basename(source)) fragment_fp = open(fragment_path, 'r') fp.write(fragment_fp.read()) fragment_fp.close() finally: fp.close() def _cfgFindUninitialized(self, config, cfgFiles, autoPrepare, mustPrepare): comPath = os.path.dirname(os.path.commonprefix(cfgFiles)) cfgTodo = [] cfgStatus = [] for cfg in cfgFiles: cfg_new = config.getWorkPath(os.path.basename(cfg)) cfg_new_exists = os.path.exists(cfg_new) if cfg_new_exists: isInstrumented = self._cfgIsInstrumented(cfg_new) doCopy = False else: isInstrumented = self._cfgIsInstrumented(cfg) doCopy = True doPrepare = (mustPrepare or autoPrepare) and not isInstrumented doCopy = doCopy or doPrepare if doCopy: cfgTodo.append((cfg, cfg_new, doPrepare)) cfgStatus.append({1: cfg.split(comPath, 1)[1].lstrip('/'), 2: cfg_new_exists, 3: isInstrumented, 4: doPrepare}) if cfgStatus: utils.printTabular([(1, 'Config file'), (2, 'Work dir'), (3, 'Instrumented'), (4, 'Scheduled')], cfgStatus, 'lccc') return cfgTodo def _processConfigFiles(self, config, cfgFiles, fragment_path, autoPrepare, mustPrepare): # process list of uninitialized config files for (cfg, cfg_new, doPrepare) in self._cfgFindUninitialized(config, cfgFiles, autoPrepare, mustPrepare): if doPrepare and (autoPrepare or utils.getUserBool('Do you want to prepare %s for running over the dataset?' % cfg, True)): self._cfgStore(cfg, cfg_new, fragment_path) else: self._cfgStore(cfg, cfg_new) result = [] for cfg in cfgFiles: cfg_new = config.getWorkPath(os.path.basename(cfg)) if not os.path.exists(cfg_new): raise ConfigError('Config file %r was not copied to the work directory!' % cfg) isInstrumented = self._cfgIsInstrumented(cfg_new) if mustPrepare and not isInstrumented: raise ConfigError('Config file %r must use %s to work properly!' % (cfg, str.join(', ', imap(lambda x: '@%s@' % x, self.neededVars())))) if autoPrepare and not isInstrumented: self._log.warning('Config file %r was not instrumented!', cfg) result.append(cfg_new) return result def neededVars(self): if self._dataSplitter: return self._partProcessor.getNeededKeys(self._dataSplitter) or [] return ['MAX_EVENTS'] # Get environment variables for gc_config.sh def getTaskConfig(self): data = SCRAMTask.getTaskConfig(self) data.update(dict(self.searchLoc)) data['GZIP_OUT'] = utils.QM(self.gzipOut, 'yes', 'no') data['SE_RUNTIME'] = utils.QM(self._projectAreaTarballSE, 'yes', 'no') data['HAS_RUNTIME'] = utils.QM(self._projectArea, 'yes', 'no') data['CMSSW_EXEC'] = 'cmsRun' data['CMSSW_CONFIG'] = str.join(' ', imap(os.path.basename, self.configFiles)) data['CMSSW_OLD_RELEASETOP'] = self._oldReleaseTop if self.prolog.isActive(): data['CMSSW_PROLOG_EXEC'] = self.prolog.getCommand() data['CMSSW_PROLOG_SB_IN_FILES'] = str.join(' ', imap(lambda x: x.pathRel, self.prolog.getSBInFiles())) data['CMSSW_PROLOG_ARGS'] = self.prolog.getArguments() if self.epilog.isActive(): data['CMSSW_EPILOG_EXEC'] = self.epilog.getCommand() data['CMSSW_EPILOG_SB_IN_FILES'] = str.join(' ', imap(lambda x: x.pathRel, self.epilog.getSBInFiles())) data['CMSSW_EPILOG_ARGS'] = self.epilog.getArguments() return data # Get files to be transfered via SE (description, source, target) def getSEInFiles(self): files = SCRAMTask.getSEInFiles(self) if self._projectArea and self._projectAreaTarballSE: return files + [('CMSSW tarball', self._projectAreaTarball, self.taskID + '.tar.gz')] return files # Get files for input sandbox def getSBInFiles(self): files = SCRAMTask.getSBInFiles(self) + self.prolog.getSBInFiles() + self.epilog.getSBInFiles() for cfgFile in self.configFiles: files.append(utils.Result(pathAbs = cfgFile, pathRel = os.path.basename(cfgFile))) if self._projectArea and not self._projectAreaTarballSE: files.append(utils.Result(pathAbs = self._projectAreaTarball, pathRel = os.path.basename(self._projectAreaTarball))) return files + [utils.Result(pathAbs = utils.pathShare('gc-run.cmssw.sh', pkg = 'grid_control_cms'), pathRel = 'gc-run.cmssw.sh')] # Get files for output sandbox def getSBOutFiles(self): if not self.configFiles: return SCRAMTask.getSBOutFiles(self) return SCRAMTask.getSBOutFiles(self) + utils.QM(self.gzipOut, ['cmssw.log.gz'], []) + ['cmssw.dbs.tar.gz'] def getCommand(self): return './gc-run.cmssw.sh $@' def getJobArguments(self, jobNum): return SCRAMTask.getJobArguments(self, jobNum) + ' ' + self.arguments def getVarNames(self): result = SCRAMTask.getVarNames(self) if self._dataSplitter is None: result.append('MAX_EVENTS') return result # Get job dependent environment variables def getJobConfig(self, jobNum): data = SCRAMTask.getJobConfig(self, jobNum) if self._dataSplitter is None: data['MAX_EVENTS'] = self.eventsPerJob return data def getDescription(self, jobNum): # (task name, job name, type) result = SCRAMTask.getDescription(self, jobNum) if not result.jobType: result.jobType = 'analysis' return result
class CMSSW(DataTask): getConfigSections = DataTask.createFunction_getConfigSections(['CMSSW']) def __init__(self, config, name): config.set('se input timeout', '0:30', override = False) config.set('dataset provider', 'DBS3Provider', override = False) config.set('dataset splitter', 'EventBoundarySplitter', override = False) DataTask.__init__(self, config, name) self.errorDict.update(dict(self.updateErrorDict(utils.pathShare('gc-run.cmssw.sh', pkg = 'grid_control_cms')))) # SCRAM info scramProject = config.getList('scram project', []) if len(scramProject): self.projectArea = config.getPath('project area', '') if len(self.projectArea): raise ConfigError('Cannot specify both SCRAM project and project area') if len(scramProject) != 2: raise ConfigError('SCRAM project needs exactly 2 arguments: PROJECT VERSION') else: self.projectArea = config.getPath('project area') # This works in tandem with provider_dbsv2.py ! self.selectedLumis = parseLumiFilter(config.get('lumi filter', '')) self.useReqs = config.getBool('software requirements', True, onChange = None) self.seRuntime = config.getBool('se runtime', False) self.runtimePath = config.getWorkPath('runtime.tar.gz') if len(self.projectArea): defaultPattern = '-.* -config bin lib python module */data *.xml *.sql *.cf[if] *.py -*/.git -*/.svn -*/CVS -*/work.*' self.pattern = config.getList('area files', defaultPattern.split()) if os.path.exists(self.projectArea): utils.vprint('Project area found in: %s' % self.projectArea, -1) else: raise ConfigError('Specified config area %r does not exist!' % self.projectArea) scramPath = os.path.join(self.projectArea, '.SCRAM') # try to open it try: fp = open(os.path.join(scramPath, 'Environment'), 'r') self.scramEnv = utils.DictFormat().parse(fp, keyParser = {None: str}) except: raise ConfigError('Project area file %s/.SCRAM/Environment cannot be parsed!' % self.projectArea) for key in ['SCRAM_PROJECTNAME', 'SCRAM_PROJECTVERSION']: if key not in self.scramEnv: raise ConfigError('Installed program in project area not recognized.') archs = filter(lambda x: os.path.isdir(os.path.join(scramPath, x)) and not x.startswith('.'), os.listdir(scramPath)) self.scramArch = config.get('scram arch', (archs + [noDefault])[0]) try: fp = open(os.path.join(scramPath, self.scramArch, 'Environment'), 'r') self.scramEnv.update(utils.DictFormat().parse(fp, keyParser = {None: str})) except: raise ConfigError('Project area file .SCRAM/%s/Environment cannot be parsed!' % self.scramArch) else: self.scramEnv = { 'SCRAM_PROJECTNAME': scramProject[0], 'SCRAM_PROJECTVERSION': scramProject[1] } self.scramArch = config.get('scram arch') self.scramVersion = config.get('scram version', 'scramv1') if self.scramEnv['SCRAM_PROJECTNAME'] != 'CMSSW': raise ConfigError('Project area not a valid CMSSW project area.') # Information about search order for software environment self.searchLoc = [] if config.getState('sandbox'): userPath = config.get('cmssw dir', '') if userPath != '': self.searchLoc.append(('CMSSW_DIR_USER', userPath)) if self.scramEnv.get('RELEASETOP', None): projPath = os.path.normpath('%s/../../../../' % self.scramEnv['RELEASETOP']) self.searchLoc.append(('CMSSW_DIR_PRO', projPath)) if len(self.searchLoc): utils.vprint('Local jobs will try to use the CMSSW software located here:', -1) for i, loc in enumerate(self.searchLoc): key, value = loc utils.vprint(' %i) %s' % (i + 1, value), -1) # Prolog / Epilog script support - warn about old syntax self.prolog = TaskExecutableWrapper(config, 'prolog', '') self.epilog = TaskExecutableWrapper(config, 'epilog', '') if config.getPaths('executable', []) != []: raise ConfigError('Prefix executable and argument options with either prolog or epilog!') self.arguments = config.get('arguments', '') # Get cmssw config files and check their existance self.configFiles = [] cfgDefault = QM(self.prolog.isActive() or self.epilog.isActive(), [], noDefault) for cfgFile in config.getPaths('config file', cfgDefault, mustExist = False): newPath = config.getWorkPath(os.path.basename(cfgFile)) if not os.path.exists(newPath): if not os.path.exists(cfgFile): raise ConfigError('Config file %r not found.' % cfgFile) shutil.copyfile(cfgFile, newPath) self.configFiles.append(newPath) # Check that for dataset jobs the necessary placeholders are in the config file self.prepare = config.getBool('prepare config', False) fragment = config.getPath('instrumentation fragment', os.path.join('packages', 'grid_control_cms', 'share', 'fragmentForCMSSW.py')) if self.dataSplitter != None: if config.getState('sandbox'): if len(self.configFiles) > 0: self.instrumentCfgQueue(self.configFiles, fragment, mustPrepare = True) else: self.eventsPerJob = config.get('events per job', '0') if config.getState(detail = 'sandbox') and self.prepare: self.instrumentCfgQueue(self.configFiles, fragment) if not os.path.exists(config.getWorkPath('runtime.tar.gz')): config.setState(True, detail = 'sandbox') if config.getState(detail = 'sandbox'): if os.path.exists(config.getWorkPath('runtime.tar.gz')): if not utils.getUserBool('Runtime already exists! Do you want to regenerate CMSSW tarball?', True): return # Generate runtime tarball (and move to SE) if self.projectArea: utils.genTarball(config.getWorkPath('runtime.tar.gz'), utils.matchFiles(self.projectArea, self.pattern)) if self.seRuntime: config.setState(True, detail = 'storage') def initDataProcessor(self): return CMSDataSplitProcessor(self.checkSE) def instrumentCfgQueue(self, cfgFiles, fragment, mustPrepare = False): def isInstrumented(cfgName): cfg = open(cfgName, 'r').read() for tag in self.neededVars(): if (not '__%s__' % tag in cfg) and (not '@%s@' % tag in cfg): return False return True def doInstrument(cfgName): if not isInstrumented(cfgName) or 'customise_for_gc' not in open(cfgName, 'r').read(): utils.vprint('Instrumenting...', os.path.basename(cfgName), -1) open(cfgName, 'a').write(open(fragment, 'r').read()) else: utils.vprint('%s already contains customise_for_gc and all needed variables' % os.path.basename(cfgName), -1) cfgStatus = [] comPath = os.path.dirname(os.path.commonprefix(cfgFiles)) for cfg in cfgFiles: cfgStatus.append({0: cfg.split(comPath, 1)[1].lstrip('/'), 1: str(isInstrumented(cfg)), 2: cfg}) utils.printTabular([(0, 'Config file'), (1, 'Instrumented')], cfgStatus, 'lc') for cfg in cfgFiles: if self.prepare or not isInstrumented(cfg): if self.prepare or utils.getUserBool('Do you want to prepare %s for running over the dataset?' % cfg, True): doInstrument(cfg) if mustPrepare and not (True in map(isInstrumented, cfgFiles)): raise ConfigError('A config file must use %s to work properly!' % str.join(', ', map(lambda x: '@%s@' % x, self.neededVars()))) # Lumi filter need def neededVars(self): result = [] varMap = { DataSplitter.NEntries: 'MAX_EVENTS', DataSplitter.Skipped: 'SKIP_EVENTS', DataSplitter.FileList: 'FILE_NAMES' } if self.dataSplitter: result.extend(map(lambda x: varMap[x], self.dataSplitter.neededVars())) if self.selectedLumis: result.append('LUMI_RANGE') return result # Called on job submission def getSubmitInfo(self, jobNum): result = DataTask.getSubmitInfo(self, jobNum) result.update({'application': self.scramEnv['SCRAM_PROJECTVERSION'], 'exe': 'cmsRun'}) if self.dataSplitter == None: result.update({'nevtJob': self.eventsPerJob}) return result # Get environment variables for gc_config.sh def getTaskConfig(self): data = DataTask.getTaskConfig(self) data.update(dict(self.searchLoc)) data['CMSSW_OLD_RELEASETOP'] = self.scramEnv.get('RELEASETOP', None) data['DB_EXEC'] = 'cmsRun' data['SCRAM_ARCH'] = self.scramArch data['SCRAM_VERSION'] = self.scramVersion data['SCRAM_PROJECTVERSION'] = self.scramEnv['SCRAM_PROJECTVERSION'] data['GZIP_OUT'] = QM(self.gzipOut, 'yes', 'no') data['SE_RUNTIME'] = QM(self.seRuntime, 'yes', 'no') data['HAS_RUNTIME'] = QM(len(self.projectArea), 'yes', 'no') data['CMSSW_CONFIG'] = str.join(' ', map(os.path.basename, self.configFiles)) if self.prolog.isActive(): data['CMSSW_PROLOG_EXEC'] = self.prolog.getCommand() data['CMSSW_PROLOG_SB_In_FILES'] = str.join(' ', self.prolog.getSBInFiles()) data['CMSSW_PROLOG_ARGS'] = self.prolog.getArguments() if self.epilog.isActive(): data['CMSSW_EPILOG_EXEC'] = self.epilog.getCommand() data['CMSSW_EPILOG_SB_In_FILES'] = str.join(' ', self.epilog.getSBInFiles()) data['CMSSW_EPILOG_ARGS'] = self.epilog.getArguments() return data # Get job requirements def getRequirements(self, jobNum): reqs = DataTask.getRequirements(self, jobNum) if self.useReqs: reqs.append((WMS.SOFTWARE, 'VO-cms-%s' % self.scramEnv['SCRAM_PROJECTVERSION'])) reqs.append((WMS.SOFTWARE, 'VO-cms-%s' % self.scramArch)) return reqs # Get files to be transfered via SE (description, source, target) def getSEInFiles(self): files = DataTask.getSEInFiles(self) if len(self.projectArea) and self.seRuntime: return files + [('CMSSW runtime', self.runtimePath, self.taskID + '.tar.gz')] return files # Get files for input sandbox def getSBInFiles(self): files = DataTask.getSBInFiles(self) + self.configFiles + self.prolog.getSBInFiles() + self.epilog.getSBInFiles() if len(self.projectArea) and not self.seRuntime: files.append(self.runtimePath) return files + [utils.pathShare('gc-run.cmssw.sh', pkg = 'grid_control_cms')] # Get files for output sandbox def getSBOutFiles(self): return DataTask.getSBOutFiles(self) + QM(self.gzipOut, ['cmssw.log.gz'], []) + ['cmssw.dbs.tar.gz'] def getCommand(self): return './gc-run.cmssw.sh $@' def getJobArguments(self, jobNum): return DataTask.getJobArguments(self, jobNum) + ' ' + self.arguments def getActiveLumiFilter(self, lumifilter, jobNum = None): getLR = lambda x: str.join(',', map(lambda x: '"%s"' % x, formatLumi(x))) return getLR(lumifilter) # TODO: Validate subset selection try: splitInfo = self.dataSplitter.getSplitInfo(jobNum) runTag = splitInfo[DataSplitter.MetadataHeader].index("Runs") runList = utils.listMapReduce(lambda m: m[runTag], splitInfo[DataSplitter.Metadata]) return getLR(filterLumiFilter(runList, lumifilter)) except: return getLR(lumifilter) def getVarNames(self): result = DataTask.getVarNames(self) if self.dataSplitter == None: result.append('MAX_EVENTS') if self.selectedLumis: result.append('LUMI_RANGE') return result # Get job dependent environment variables def getJobConfig(self, jobNum): data = DataTask.getJobConfig(self, jobNum) if self.dataSplitter == None: data['MAX_EVENTS'] = self.eventsPerJob if self.selectedLumis: data['LUMI_RANGE'] = self.getActiveLumiFilter(self.selectedLumis) return data def getDescription(self, jobNum): # (task name, job name, type) (taskName, jobName, jobType) = DataTask.getDescription(self, jobNum) return (taskName, jobName, QM(jobType, jobType, 'analysis')) def getDependencies(self): return DataTask.getDependencies(self) + ['cmssw']
class CMSSW(DataTask): configSections = DataTask.configSections + ['CMSSW'] def __init__(self, config, name): config.set('se input timeout', '0:30') config.set('dataset provider', 'DBS3Provider') config.set('dataset splitter', 'EventBoundarySplitter') config.set('partition processor', 'CMSPartitionProcessor LocationPartitionProcessor LumiPartitionProcessor') config.set('dataset processor', 'LumiDataProcessor', '+=') DataTask.__init__(self, config, name) self.updateErrorDict(utils.pathShare('gc-run.cmssw.sh', pkg = 'grid_control_cms')) # SCRAM settings self._configureSCRAMSettings(config) self.useReqs = config.getBool('software requirements', True, onChange = None) self._projectAreaTarballSE = config.getBool(['se project area', 'se runtime'], True) self._projectAreaTarball = config.getWorkPath('cmssw-project-area.tar.gz') # Information about search order for software environment self.searchLoc = self._getCMSSWPaths(config) # Prolog / Epilog script support - warn about old syntax self.prolog = TaskExecutableWrapper(config, 'prolog', '') self.epilog = TaskExecutableWrapper(config, 'epilog', '') if config.getPaths('executable', []) != []: raise ConfigError('Prefix executable and argument options with either prolog or epilog!') self.arguments = config.get('arguments', '') # Get cmssw config files and check their existance # Check that for dataset jobs the necessary placeholders are in the config file if self.dataSplitter is None: self.eventsPerJob = config.get('events per job', '0') fragment = config.getPath('instrumentation fragment', utils.pathShare('fragmentForCMSSW.py', pkg = 'grid_control_cms')) self.configFiles = self._processConfigFiles(config, list(self._getConfigFiles(config)), fragment, autoPrepare = config.getBool('instrumentation', True), mustPrepare = (self.dataSplitter is not None)) # Create project area tarball if not os.path.exists(self._projectAreaTarball): config.setState(True, 'init', detail = 'sandbox') if config.getState('init', detail = 'sandbox'): if os.path.exists(self._projectAreaTarball): if not utils.getUserBool('CMSSW tarball already exists! Do you want to regenerate it?', True): return # Generate CMSSW tarball if self.projectArea: utils.genTarball(self._projectAreaTarball, utils.matchFiles(self.projectArea, self.pattern)) if self._projectAreaTarballSE: config.setState(True, 'init', detail = 'storage') def _configureSCRAMSettings(self, config): scramProject = config.getList('scram project', []) if len(scramProject): self.projectArea = config.getPath('project area', '') if len(self.projectArea): raise ConfigError('Cannot specify both SCRAM project and project area') if len(scramProject) != 2: raise ConfigError('SCRAM project needs exactly 2 arguments: PROJECT VERSION') else: self.projectArea = config.getPath('project area') if len(self.projectArea): defaultPattern = '-.* -config bin lib python module */data *.xml *.sql *.cf[if] *.py -*/.git -*/.svn -*/CVS -*/work.*' self.pattern = config.getList('area files', defaultPattern.split()) if os.path.exists(self.projectArea): utils.vprint('Project area found in: %s' % self.projectArea, -1) else: raise ConfigError('Specified config area %r does not exist!' % self.projectArea) scramPath = os.path.join(self.projectArea, '.SCRAM') # try to open it try: fp = open(os.path.join(scramPath, 'Environment'), 'r') self.scramEnv = utils.DictFormat().parse(fp, keyParser = {None: str}) except Exception: raise ConfigError('Project area file %s/.SCRAM/Environment cannot be parsed!' % self.projectArea) for key in ['SCRAM_PROJECTNAME', 'SCRAM_PROJECTVERSION']: if key not in self.scramEnv: raise ConfigError('Installed program in project area not recognized.') archs = lfilter(lambda x: os.path.isdir(os.path.join(scramPath, x)) and not x.startswith('.'), os.listdir(scramPath)) self.scramArch = config.get('scram arch', (archs + [noDefault])[0]) try: fp = open(os.path.join(scramPath, self.scramArch, 'Environment'), 'r') self.scramEnv.update(utils.DictFormat().parse(fp, keyParser = {None: str})) except Exception: raise ConfigError('Project area file .SCRAM/%s/Environment cannot be parsed!' % self.scramArch) else: self.scramEnv = { 'SCRAM_PROJECTNAME': scramProject[0], 'SCRAM_PROJECTVERSION': scramProject[1] } self.scramArch = config.get('scram arch') self.scramVersion = config.get('scram version', 'scramv1') if self.scramEnv['SCRAM_PROJECTNAME'] != 'CMSSW': raise ConfigError('Project area contains no CMSSW project') def _getCMSSWPaths(self, config): result = [] if config.getState('init', detail = 'sandbox'): userPath = config.get('cmssw dir', '') if userPath != '': result.append(('CMSSW_DIR_USER', userPath)) if self.scramEnv.get('RELEASETOP', None): projPath = os.path.normpath('%s/../../../../' % self.scramEnv['RELEASETOP']) result.append(('CMSSW_DIR_PRO', projPath)) if result: utils.vprint('Local jobs will try to use the CMSSW software located here:', -1) for i, loc in enumerate(result): utils.vprint(' %i) %s' % (i + 1, loc[1]), -1) return result def _getConfigFiles(self, config): cfgDefault = utils.QM(self.prolog.isActive() or self.epilog.isActive(), [], noDefault) for cfgFile in config.getPaths('config file', cfgDefault, mustExist = False): if not os.path.exists(cfgFile): raise ConfigError('Config file %r not found.' % cfgFile) yield cfgFile def _cfgIsInstrumented(self, fn): fp = open(fn, 'r') try: cfg = fp.read() finally: fp.close() for tag in self.neededVars(): if (not '__%s__' % tag in cfg) and (not '@%s@' % tag in cfg): return False return True def _cfgStore(self, source, target, fragment_path = None): fp = open(source, 'r') try: content = fp.read() finally: fp.close() fp = open(target, 'w') try: fp.write(content) if fragment_path: logging.getLogger('user').info('Instrumenting... %s', os.path.basename(source)) fragment_fp = open(fragment_path, 'r') fp.write(fragment_fp.read()) fragment_fp.close() finally: fp.close() def _cfgFindUninitialized(self, config, cfgFiles, autoPrepare, mustPrepare): comPath = os.path.dirname(os.path.commonprefix(cfgFiles)) cfgTodo = [] cfgStatus = [] for cfg in cfgFiles: cfg_new = config.getWorkPath(os.path.basename(cfg)) cfg_new_exists = os.path.exists(cfg_new) if cfg_new_exists: isInstrumented = self._cfgIsInstrumented(cfg_new) doCopy = False else: isInstrumented = self._cfgIsInstrumented(cfg) doCopy = True doPrepare = (mustPrepare or autoPrepare) and not isInstrumented doCopy = doCopy or doPrepare if doCopy: cfgTodo.append((cfg, cfg_new, doPrepare)) cfgStatus.append({1: cfg.split(comPath, 1)[1].lstrip('/'), 2: cfg_new_exists, 3: isInstrumented, 4: doPrepare}) utils.vprint('', -1) utils.printTabular([(1, 'Config file'), (2, 'Work dir'), (3, 'Instrumented'), (4, 'Scheduled')], cfgStatus, 'lccc') utils.vprint('', -1) return cfgTodo def _processConfigFiles(self, config, cfgFiles, fragment_path, autoPrepare, mustPrepare): # process list of uninitialized config files for (cfg, cfg_new, doPrepare) in self._cfgFindUninitialized(config, cfgFiles, autoPrepare, mustPrepare): if doPrepare and (autoPrepare or utils.getUserBool('Do you want to prepare %s for running over the dataset?' % cfg, True)): self._cfgStore(cfg, cfg_new, fragment_path) else: self._cfgStore(cfg, cfg_new) result = [] for cfg in cfgFiles: cfg_new = config.getWorkPath(os.path.basename(cfg)) if not os.path.exists(cfg_new): raise ConfigError('Config file %r was not copied to the work directory!' % cfg) isInstrumented = self._cfgIsInstrumented(cfg_new) if mustPrepare and not isInstrumented: raise ConfigError('Config file %r must use %s to work properly!' % (cfg, str.join(', ', imap(lambda x: '@%s@' % x, self.neededVars())))) if autoPrepare and not isInstrumented: self._log.warning('Config file %r was not instrumented!', cfg) result.append(cfg_new) return result def neededVars(self): if self.dataSplitter: return self._dataPS.getNeededDataKeys() return [] # Called on job submission def getSubmitInfo(self, jobNum): result = DataTask.getSubmitInfo(self, jobNum) result.update({'application': self.scramEnv['SCRAM_PROJECTVERSION'], 'exe': 'cmsRun'}) if self.dataSplitter is None: result.update({'nevtJob': self.eventsPerJob}) return result # Get environment variables for gc_config.sh def getTaskConfig(self): data = DataTask.getTaskConfig(self) data.update(dict(self.searchLoc)) data['CMSSW_OLD_RELEASETOP'] = self.scramEnv.get('RELEASETOP', None) data['DB_EXEC'] = 'cmsRun' data['SCRAM_ARCH'] = self.scramArch data['SCRAM_VERSION'] = self.scramVersion data['SCRAM_PROJECTVERSION'] = self.scramEnv['SCRAM_PROJECTVERSION'] data['GZIP_OUT'] = utils.QM(self.gzipOut, 'yes', 'no') data['SE_RUNTIME'] = utils.QM(self._projectAreaTarballSE, 'yes', 'no') data['HAS_RUNTIME'] = utils.QM(len(self.projectArea), 'yes', 'no') data['CMSSW_CONFIG'] = str.join(' ', imap(os.path.basename, self.configFiles)) if self.prolog.isActive(): data['CMSSW_PROLOG_EXEC'] = self.prolog.getCommand() data['CMSSW_PROLOG_SB_In_FILES'] = str.join(' ', imap(lambda x: x.pathRel, self.prolog.getSBInFiles())) data['CMSSW_PROLOG_ARGS'] = self.prolog.getArguments() if self.epilog.isActive(): data['CMSSW_EPILOG_EXEC'] = self.epilog.getCommand() data['CMSSW_EPILOG_SB_In_FILES'] = str.join(' ', imap(lambda x: x.pathRel, self.epilog.getSBInFiles())) data['CMSSW_EPILOG_ARGS'] = self.epilog.getArguments() return data # Get job requirements def getRequirements(self, jobNum): reqs = DataTask.getRequirements(self, jobNum) if self.useReqs: reqs.append((WMS.SOFTWARE, 'VO-cms-%s' % self.scramArch)) return reqs # Get files to be transfered via SE (description, source, target) def getSEInFiles(self): files = DataTask.getSEInFiles(self) if len(self.projectArea) and self._projectAreaTarballSE: return files + [('CMSSW tarball', self._projectAreaTarball, self.taskID + '.tar.gz')] return files # Get files for input sandbox def getSBInFiles(self): files = DataTask.getSBInFiles(self) + self.prolog.getSBInFiles() + self.epilog.getSBInFiles() for cfgFile in self.configFiles: files.append(utils.Result(pathAbs = cfgFile, pathRel = os.path.basename(cfgFile))) if len(self.projectArea) and not self._projectAreaTarballSE: files.append(utils.Result(pathAbs = self._projectAreaTarball, pathRel = os.path.basename(self._projectAreaTarball))) return files + [utils.Result(pathAbs = utils.pathShare('gc-run.cmssw.sh', pkg = 'grid_control_cms'), pathRel = 'gc-run.cmssw.sh')] # Get files for output sandbox def getSBOutFiles(self): return DataTask.getSBOutFiles(self) + utils.QM(self.gzipOut, ['cmssw.log.gz'], []) + ['cmssw.dbs.tar.gz'] def getCommand(self): return './gc-run.cmssw.sh $@' def getJobArguments(self, jobNum): return DataTask.getJobArguments(self, jobNum) + ' ' + self.arguments def getVarNames(self): result = DataTask.getVarNames(self) if self.dataSplitter is None: result.append('MAX_EVENTS') return result # Get job dependent environment variables def getJobConfig(self, jobNum): data = DataTask.getJobConfig(self, jobNum) if self.dataSplitter is None: data['MAX_EVENTS'] = self.eventsPerJob return data def getDescription(self, jobNum): # (task name, job name, type) result = DataTask.getDescription(self, jobNum) if not result.jobType: result.jobType = 'analysis' return result def getDependencies(self): return DataTask.getDependencies(self) + ['cmssw']
class CMSSW(DataTask): configSections = DataTask.configSections + ['CMSSW'] def __init__(self, config, name): config.set('se input timeout', '0:30') config.set('dataset provider', 'DBS3Provider') config.set('dataset splitter', 'EventBoundarySplitter') config.set( 'partition processor', 'CMSPartitionProcessor LocationPartitionProcessor LumiPartitionProcessor' ) config.set('dataset processor', 'LumiDataProcessor', '+=') DataTask.__init__(self, config, name) self.updateErrorDict( utils.pathShare('gc-run.cmssw.sh', pkg='grid_control_cms')) # SCRAM settings self._configureSCRAMSettings(config) self.useReqs = config.getBool('software requirements', True, onChange=None) self._projectAreaTarballSE = config.getBool( ['se project area', 'se runtime'], True) self._projectAreaTarball = config.getWorkPath( 'cmssw-project-area.tar.gz') # Information about search order for software environment self.searchLoc = self._getCMSSWPaths(config) # Prolog / Epilog script support - warn about old syntax self.prolog = TaskExecutableWrapper(config, 'prolog', '') self.epilog = TaskExecutableWrapper(config, 'epilog', '') if config.getPaths('executable', []) != []: raise ConfigError( 'Prefix executable and argument options with either prolog or epilog!' ) self.arguments = config.get('arguments', '') # Get cmssw config files and check their existance # Check that for dataset jobs the necessary placeholders are in the config file if self.dataSplitter is None: self.eventsPerJob = config.get('events per job', '0') fragment = config.getPath( 'instrumentation fragment', utils.pathShare('fragmentForCMSSW.py', pkg='grid_control_cms')) self.configFiles = self._processConfigFiles( config, list(self._getConfigFiles(config)), fragment, autoPrepare=config.getBool('instrumentation', True), mustPrepare=(self.dataSplitter is not None)) # Create project area tarball if not os.path.exists(self._projectAreaTarball): config.setState(True, 'init', detail='sandbox') if config.getState('init', detail='sandbox'): if os.path.exists(self._projectAreaTarball): if not utils.getUserBool( 'CMSSW tarball already exists! Do you want to regenerate it?', True): return # Generate CMSSW tarball if self.projectArea: utils.genTarball( self._projectAreaTarball, utils.matchFiles(self.projectArea, self.pattern)) if self._projectAreaTarballSE: config.setState(True, 'init', detail='storage') def _configureSCRAMSettings(self, config): scramProject = config.getList('scram project', []) if len(scramProject): self.projectArea = config.getPath('project area', '') if len(self.projectArea): raise ConfigError( 'Cannot specify both SCRAM project and project area') if len(scramProject) != 2: raise ConfigError( 'SCRAM project needs exactly 2 arguments: PROJECT VERSION') else: self.projectArea = config.getPath('project area') if len(self.projectArea): defaultPattern = '-.* -config bin lib python module */data *.xml *.sql *.cf[if] *.py -*/.git -*/.svn -*/CVS -*/work.*' self.pattern = config.getList('area files', defaultPattern.split()) if os.path.exists(self.projectArea): utils.vprint('Project area found in: %s' % self.projectArea, -1) else: raise ConfigError('Specified config area %r does not exist!' % self.projectArea) scramPath = os.path.join(self.projectArea, '.SCRAM') # try to open it try: fp = open(os.path.join(scramPath, 'Environment'), 'r') self.scramEnv = utils.DictFormat().parse(fp, keyParser={None: str}) except Exception: raise ConfigError( 'Project area file %s/.SCRAM/Environment cannot be parsed!' % self.projectArea) for key in ['SCRAM_PROJECTNAME', 'SCRAM_PROJECTVERSION']: if key not in self.scramEnv: raise ConfigError( 'Installed program in project area not recognized.') archs = lfilter( lambda x: os.path.isdir(os.path.join(scramPath, x)) and not x. startswith('.'), os.listdir(scramPath)) self.scramArch = config.get('scram arch', (archs + [noDefault])[0]) try: fp = open( os.path.join(scramPath, self.scramArch, 'Environment'), 'r') self.scramEnv.update(utils.DictFormat().parse( fp, keyParser={None: str})) except Exception: raise ConfigError( 'Project area file .SCRAM/%s/Environment cannot be parsed!' % self.scramArch) else: self.scramEnv = { 'SCRAM_PROJECTNAME': scramProject[0], 'SCRAM_PROJECTVERSION': scramProject[1] } self.scramArch = config.get('scram arch') self.scramVersion = config.get('scram version', 'scramv1') if self.scramEnv['SCRAM_PROJECTNAME'] != 'CMSSW': raise ConfigError('Project area contains no CMSSW project') def _getCMSSWPaths(self, config): result = [] if config.getState('init', detail='sandbox'): userPath = config.get('cmssw dir', '') if userPath != '': result.append(('CMSSW_DIR_USER', userPath)) if self.scramEnv.get('RELEASETOP', None): projPath = os.path.normpath('%s/../../../../' % self.scramEnv['RELEASETOP']) result.append(('CMSSW_DIR_PRO', projPath)) if result: utils.vprint( 'Local jobs will try to use the CMSSW software located here:', -1) for i, loc in enumerate(result): utils.vprint(' %i) %s' % (i + 1, loc[1]), -1) return result def _getConfigFiles(self, config): cfgDefault = utils.QM(self.prolog.isActive() or self.epilog.isActive(), [], noDefault) for cfgFile in config.getPaths('config file', cfgDefault, mustExist=False): if not os.path.exists(cfgFile): raise ConfigError('Config file %r not found.' % cfgFile) yield cfgFile def _cfgIsInstrumented(self, fn): fp = open(fn, 'r') try: cfg = fp.read() finally: fp.close() for tag in self.neededVars(): if (not '__%s__' % tag in cfg) and (not '@%s@' % tag in cfg): return False return True def _cfgStore(self, source, target, fragment_path=None): fp = open(source, 'r') try: content = fp.read() finally: fp.close() fp = open(target, 'w') try: fp.write(content) if fragment_path: logging.getLogger('user').info('Instrumenting... %s', os.path.basename(source)) fragment_fp = open(fragment_path, 'r') fp.write(fragment_fp.read()) fragment_fp.close() finally: fp.close() def _cfgFindUninitialized(self, config, cfgFiles, autoPrepare, mustPrepare): comPath = os.path.dirname(os.path.commonprefix(cfgFiles)) cfgTodo = [] cfgStatus = [] for cfg in cfgFiles: cfg_new = config.getWorkPath(os.path.basename(cfg)) cfg_new_exists = os.path.exists(cfg_new) if cfg_new_exists: isInstrumented = self._cfgIsInstrumented(cfg_new) doCopy = False else: isInstrumented = self._cfgIsInstrumented(cfg) doCopy = True doPrepare = (mustPrepare or autoPrepare) and not isInstrumented doCopy = doCopy or doPrepare if doCopy: cfgTodo.append((cfg, cfg_new, doPrepare)) cfgStatus.append({ 1: cfg.split(comPath, 1)[1].lstrip('/'), 2: cfg_new_exists, 3: isInstrumented, 4: doPrepare }) utils.vprint('', -1) utils.printTabular([(1, 'Config file'), (2, 'Work dir'), (3, 'Instrumented'), (4, 'Scheduled')], cfgStatus, 'lccc') utils.vprint('', -1) return cfgTodo def _processConfigFiles(self, config, cfgFiles, fragment_path, autoPrepare, mustPrepare): # process list of uninitialized config files for (cfg, cfg_new, doPrepare) in self._cfgFindUninitialized(config, cfgFiles, autoPrepare, mustPrepare): if doPrepare and (autoPrepare or utils.getUserBool( 'Do you want to prepare %s for running over the dataset?' % cfg, True)): self._cfgStore(cfg, cfg_new, fragment_path) else: self._cfgStore(cfg, cfg_new) result = [] for cfg in cfgFiles: cfg_new = config.getWorkPath(os.path.basename(cfg)) if not os.path.exists(cfg_new): raise ConfigError( 'Config file %r was not copied to the work directory!' % cfg) isInstrumented = self._cfgIsInstrumented(cfg_new) if mustPrepare and not isInstrumented: raise ConfigError( 'Config file %r must use %s to work properly!' % (cfg, str.join(', ', imap(lambda x: '@%s@' % x, self.neededVars())))) if autoPrepare and not isInstrumented: self._log.warning('Config file %r was not instrumented!', cfg) result.append(cfg_new) return result def neededVars(self): if self.dataSplitter: return self._dataPS.getNeededDataKeys() return [] # Called on job submission def getSubmitInfo(self, jobNum): result = DataTask.getSubmitInfo(self, jobNum) result.update({ 'application': self.scramEnv['SCRAM_PROJECTVERSION'], 'exe': 'cmsRun' }) if self.dataSplitter is None: result.update({'nevtJob': self.eventsPerJob}) return result # Get environment variables for gc_config.sh def getTaskConfig(self): data = DataTask.getTaskConfig(self) data.update(dict(self.searchLoc)) data['CMSSW_OLD_RELEASETOP'] = self.scramEnv.get('RELEASETOP', None) data['DB_EXEC'] = 'cmsRun' data['SCRAM_ARCH'] = self.scramArch data['SCRAM_VERSION'] = self.scramVersion data['SCRAM_PROJECTVERSION'] = self.scramEnv['SCRAM_PROJECTVERSION'] data['GZIP_OUT'] = utils.QM(self.gzipOut, 'yes', 'no') data['SE_RUNTIME'] = utils.QM(self._projectAreaTarballSE, 'yes', 'no') data['HAS_RUNTIME'] = utils.QM(len(self.projectArea), 'yes', 'no') data['CMSSW_CONFIG'] = str.join( ' ', imap(os.path.basename, self.configFiles)) if self.prolog.isActive(): data['CMSSW_PROLOG_EXEC'] = self.prolog.getCommand() data['CMSSW_PROLOG_SB_In_FILES'] = str.join( ' ', imap(lambda x: x.pathRel, self.prolog.getSBInFiles())) data['CMSSW_PROLOG_ARGS'] = self.prolog.getArguments() if self.epilog.isActive(): data['CMSSW_EPILOG_EXEC'] = self.epilog.getCommand() data['CMSSW_EPILOG_SB_In_FILES'] = str.join( ' ', imap(lambda x: x.pathRel, self.epilog.getSBInFiles())) data['CMSSW_EPILOG_ARGS'] = self.epilog.getArguments() return data # Get job requirements def getRequirements(self, jobNum): reqs = DataTask.getRequirements(self, jobNum) if self.useReqs: reqs.append((WMS.SOFTWARE, 'VO-cms-%s' % self.scramArch)) return reqs # Get files to be transfered via SE (description, source, target) def getSEInFiles(self): files = DataTask.getSEInFiles(self) if len(self.projectArea) and self._projectAreaTarballSE: return files + [('CMSSW tarball', self._projectAreaTarball, self.taskID + '.tar.gz')] return files # Get files for input sandbox def getSBInFiles(self): files = DataTask.getSBInFiles( self) + self.prolog.getSBInFiles() + self.epilog.getSBInFiles() for cfgFile in self.configFiles: files.append( utils.Result(pathAbs=cfgFile, pathRel=os.path.basename(cfgFile))) if len(self.projectArea) and not self._projectAreaTarballSE: files.append( utils.Result(pathAbs=self._projectAreaTarball, pathRel=os.path.basename( self._projectAreaTarball))) return files + [ utils.Result(pathAbs=utils.pathShare('gc-run.cmssw.sh', pkg='grid_control_cms'), pathRel='gc-run.cmssw.sh') ] # Get files for output sandbox def getSBOutFiles(self): return DataTask.getSBOutFiles(self) + utils.QM( self.gzipOut, ['cmssw.log.gz'], []) + ['cmssw.dbs.tar.gz'] def getCommand(self): return './gc-run.cmssw.sh $@' def getJobArguments(self, jobNum): return DataTask.getJobArguments(self, jobNum) + ' ' + self.arguments def getVarNames(self): result = DataTask.getVarNames(self) if self.dataSplitter is None: result.append('MAX_EVENTS') return result # Get job dependent environment variables def getJobConfig(self, jobNum): data = DataTask.getJobConfig(self, jobNum) if self.dataSplitter is None: data['MAX_EVENTS'] = self.eventsPerJob return data def getDescription(self, jobNum): # (task name, job name, type) result = DataTask.getDescription(self, jobNum) if not result.jobType: result.jobType = 'analysis' return result def getDependencies(self): return DataTask.getDependencies(self) + ['cmssw']