示例#1
0
class UserTask(DataTask):
    alias = ['UserMod']
    configSections = DataTask.configSections + ['UserMod', 'UserTask']

    def __init__(self, config, name):
        DataTask.__init__(self, config, name)
        self._exeWrap = TaskExecutableWrapper(config)

    def getCommand(self):
        return '(%s) > job.stdout 2> job.stderr' % self._exeWrap.getCommand()

    def getJobArguments(self, jobNum):
        return DataTask.getJobArguments(
            self, jobNum) + ' ' + self._exeWrap.getArguments()

    def getSBInFiles(self):
        return DataTask.getSBInFiles(self) + self._exeWrap.getSBInFiles()

    def getSBOutFiles(self):
        tmp = lmap(lambda s: s + utils.QM(self.gzipOut, '.gz', ''),
                   ['job.stdout', 'job.stderr'])
        return DataTask.getSBOutFiles(self) + tmp
示例#2
0
class UserTask(DataTask):
	configSections = DataTask.configSections + ['UserTask']

	def __init__(self, config, name):
		DataTask.__init__(self, config, name)
		self._exeWrap = TaskExecutableWrapper(config)


	def getCommand(self):
		return '(%s) > job.stdout 2> job.stderr' % self._exeWrap.getCommand()


	def getJobArguments(self, jobNum):
		return DataTask.getJobArguments(self, jobNum) + ' ' + self._exeWrap.getArguments()


	def getSBInFiles(self):
		return DataTask.getSBInFiles(self) + self._exeWrap.getSBInFiles()


	def getSBOutFiles(self):
		tmp = map(lambda s: s + utils.QM(self.gzipOut, '.gz', ''), ['job.stdout', 'job.stderr'])
		return DataTask.getSBOutFiles(self) + tmp
示例#3
0
class CMSSW(SCRAMTask):
	configSections = SCRAMTask.configSections + ['CMSSW']

	def __init__(self, config, name):
		config.set('se input timeout', '0:30')
		config.set('dataset provider', 'DBS3Provider')
		config.set('dataset splitter', 'EventBoundarySplitter')
		config.set('dataset processor', 'LumiDataProcessor', '+=')
		config.set('partition processor', 'TFCPartitionProcessor LocationPartitionProcessor MetaPartitionProcessor ' +
			'LFNPartitionProcessor LumiPartitionProcessor CMSSWPartitionProcessor')
		dash_config = config.changeView(viewClass = 'SimpleConfigView', setSections = ['dashboard'])
		dash_config.set('application', 'cmsRun')
		SCRAMTask.__init__(self, config, name)
		if self._scramProject != 'CMSSW':
			raise ConfigError('Project area contains no CMSSW project')

		self._oldReleaseTop = None
		if self._projectArea:
			self._oldReleaseTop = self._parse_scram_file(os.path.join(self._projectArea, '.SCRAM', self._scramArch, 'Environment')).get('RELEASETOP', None)

		self.updateErrorDict(utils.pathShare('gc-run.cmssw.sh', pkg = 'grid_control_cms'))

		self._projectAreaTarballSE = config.getBool(['se runtime', 'se project area'], True)
		self._projectAreaTarball = config.getWorkPath('cmssw-project-area.tar.gz')

		# Prolog / Epilog script support - warn about old syntax
		self.prolog = TaskExecutableWrapper(config, 'prolog', '')
		self.epilog = TaskExecutableWrapper(config, 'epilog', '')
		if config.getPaths('executable', []) != []:
			raise ConfigError('Prefix executable and argument options with either prolog or epilog!')
		self.arguments = config.get('arguments', '')

		# Get cmssw config files and check their existance
		# Check that for dataset jobs the necessary placeholders are in the config file
		if self._dataSplitter is None:
			self.eventsPerJob = config.get('events per job', '0') # this can be a variable like @USER_EVENTS@!
		fragment = config.getPath('instrumentation fragment', utils.pathShare('fragmentForCMSSW.py', pkg = 'grid_control_cms'))
		self.configFiles = self._processConfigFiles(config, list(self._getConfigFiles(config)), fragment,
			autoPrepare = config.getBool('instrumentation', True),
			mustPrepare = (self._dataSplitter is not None))

		# Create project area tarball
		if self._projectArea and not os.path.exists(self._projectAreaTarball):
			config.setState(True, 'init', detail = 'sandbox')
		# Information about search order for software environment
		self.searchLoc = self._getCMSSWPaths(config)
		if config.getState('init', detail = 'sandbox'):
			if os.path.exists(self._projectAreaTarball):
				if not utils.getUserBool('CMSSW tarball already exists! Do you want to regenerate it?', True):
					return
			# Generate CMSSW tarball
			if self._projectArea:
				utils.genTarball(self._projectAreaTarball, utils.matchFiles(self._projectArea, self._projectAreaPattern))
			if self._projectAreaTarballSE:
				config.setState(True, 'init', detail = 'storage')


	def _getCMSSWPaths(self, config):
		result = []
		userPath = config.get(['cmssw dir', 'vo software dir'], '')
		if userPath:
			userPathLocal = os.path.abspath(utils.cleanPath(userPath))
			if os.path.exists(userPathLocal):
				userPath = userPathLocal
		if userPath:
			result.append(('CMSSW_DIR_USER', userPath))
		if self._oldReleaseTop:
			projPath = os.path.normpath('%s/../../../../' % self._oldReleaseTop)
			result.append(('CMSSW_DIR_PRO', projPath))
		log = logging.getLogger('user')
		log.info('Local jobs will try to use the CMSSW software located here:')
		for i, loc in enumerate(result):
			log.info(' %i) %s', i + 1, loc[1])
		if result:
			log.info('')
		return result


	def _getConfigFiles(self, config):
		cfgDefault = utils.QM(self.prolog.isActive() or self.epilog.isActive(), [], noDefault)
		for cfgFile in config.getPaths('config file', cfgDefault, mustExist = False):
			if not os.path.exists(cfgFile):
				raise ConfigError('Config file %r not found.' % cfgFile)
			yield cfgFile


	def _cfgIsInstrumented(self, fn):
		fp = open(fn, 'r')
		try:
			cfg = fp.read()
		finally:
			fp.close()
		for tag in self.neededVars():
			if (not '__%s__' % tag in cfg) and (not '@%s@' % tag in cfg):
				return False
		return True


	def _cfgStore(self, source, target, fragment_path = None):
		fp = open(source, 'r')
		try:
			content = fp.read()
		finally:
			fp.close()
		fp = open(target, 'w')
		try:
			fp.write(content)
			if fragment_path:
				logging.getLogger('user').info('Instrumenting... %s', os.path.basename(source))
				fragment_fp = open(fragment_path, 'r')
				fp.write(fragment_fp.read())
				fragment_fp.close()
		finally:
			fp.close()


	def _cfgFindUninitialized(self, config, cfgFiles, autoPrepare, mustPrepare):
		comPath = os.path.dirname(os.path.commonprefix(cfgFiles))

		cfgTodo = []
		cfgStatus = []
		for cfg in cfgFiles:
			cfg_new = config.getWorkPath(os.path.basename(cfg))
			cfg_new_exists = os.path.exists(cfg_new)
			if cfg_new_exists:
				isInstrumented = self._cfgIsInstrumented(cfg_new)
				doCopy = False
			else:
				isInstrumented = self._cfgIsInstrumented(cfg)
				doCopy = True
			doPrepare = (mustPrepare or autoPrepare) and not isInstrumented
			doCopy = doCopy or doPrepare
			if doCopy:
				cfgTodo.append((cfg, cfg_new, doPrepare))
			cfgStatus.append({1: cfg.split(comPath, 1)[1].lstrip('/'), 2: cfg_new_exists,
				3: isInstrumented, 4: doPrepare})

		if cfgStatus:
			utils.printTabular([(1, 'Config file'), (2, 'Work dir'), (3, 'Instrumented'), (4, 'Scheduled')], cfgStatus, 'lccc')
		return cfgTodo


	def _processConfigFiles(self, config, cfgFiles, fragment_path, autoPrepare, mustPrepare):
		# process list of uninitialized config files
		for (cfg, cfg_new, doPrepare) in self._cfgFindUninitialized(config, cfgFiles, autoPrepare, mustPrepare):
			if doPrepare and (autoPrepare or utils.getUserBool('Do you want to prepare %s for running over the dataset?' % cfg, True)):
				self._cfgStore(cfg, cfg_new, fragment_path)
			else:
				self._cfgStore(cfg, cfg_new)

		result = []
		for cfg in cfgFiles:
			cfg_new = config.getWorkPath(os.path.basename(cfg))
			if not os.path.exists(cfg_new):
				raise ConfigError('Config file %r was not copied to the work directory!' % cfg)
			isInstrumented = self._cfgIsInstrumented(cfg_new)
			if mustPrepare and not isInstrumented:
				raise ConfigError('Config file %r must use %s to work properly!' %
					(cfg, str.join(', ', imap(lambda x: '@%s@' % x, self.neededVars()))))
			if autoPrepare and not isInstrumented:
				self._log.warning('Config file %r was not instrumented!', cfg)
			result.append(cfg_new)
		return result


	def neededVars(self):
		if self._dataSplitter:
			return self._partProcessor.getNeededKeys(self._dataSplitter) or []
		return ['MAX_EVENTS']


	# Get environment variables for gc_config.sh
	def getTaskConfig(self):
		data = SCRAMTask.getTaskConfig(self)
		data.update(dict(self.searchLoc))
		data['GZIP_OUT'] = utils.QM(self.gzipOut, 'yes', 'no')
		data['SE_RUNTIME'] = utils.QM(self._projectAreaTarballSE, 'yes', 'no')
		data['HAS_RUNTIME'] = utils.QM(self._projectArea, 'yes', 'no')
		data['CMSSW_EXEC'] = 'cmsRun'
		data['CMSSW_CONFIG'] = str.join(' ', imap(os.path.basename, self.configFiles))
		data['CMSSW_OLD_RELEASETOP'] = self._oldReleaseTop
		if self.prolog.isActive():
			data['CMSSW_PROLOG_EXEC'] = self.prolog.getCommand()
			data['CMSSW_PROLOG_SB_IN_FILES'] = str.join(' ', imap(lambda x: x.pathRel, self.prolog.getSBInFiles()))
			data['CMSSW_PROLOG_ARGS'] = self.prolog.getArguments()
		if self.epilog.isActive():
			data['CMSSW_EPILOG_EXEC'] = self.epilog.getCommand()
			data['CMSSW_EPILOG_SB_IN_FILES'] = str.join(' ', imap(lambda x: x.pathRel, self.epilog.getSBInFiles()))
			data['CMSSW_EPILOG_ARGS'] = self.epilog.getArguments()
		return data


	# Get files to be transfered via SE (description, source, target)
	def getSEInFiles(self):
		files = SCRAMTask.getSEInFiles(self)
		if self._projectArea and self._projectAreaTarballSE:
			return files + [('CMSSW tarball', self._projectAreaTarball, self.taskID + '.tar.gz')]
		return files


	# Get files for input sandbox
	def getSBInFiles(self):
		files = SCRAMTask.getSBInFiles(self) + self.prolog.getSBInFiles() + self.epilog.getSBInFiles()
		for cfgFile in self.configFiles:
			files.append(utils.Result(pathAbs = cfgFile, pathRel = os.path.basename(cfgFile)))
		if self._projectArea and not self._projectAreaTarballSE:
			files.append(utils.Result(pathAbs = self._projectAreaTarball, pathRel = os.path.basename(self._projectAreaTarball)))
		return files + [utils.Result(pathAbs = utils.pathShare('gc-run.cmssw.sh', pkg = 'grid_control_cms'), pathRel = 'gc-run.cmssw.sh')]


	# Get files for output sandbox
	def getSBOutFiles(self):
		if not self.configFiles:
			return SCRAMTask.getSBOutFiles(self)
		return SCRAMTask.getSBOutFiles(self) + utils.QM(self.gzipOut, ['cmssw.log.gz'], []) + ['cmssw.dbs.tar.gz']


	def getCommand(self):
		return './gc-run.cmssw.sh $@'


	def getJobArguments(self, jobNum):
		return SCRAMTask.getJobArguments(self, jobNum) + ' ' + self.arguments


	def getVarNames(self):
		result = SCRAMTask.getVarNames(self)
		if self._dataSplitter is None:
			result.append('MAX_EVENTS')
		return result


	# Get job dependent environment variables
	def getJobConfig(self, jobNum):
		data = SCRAMTask.getJobConfig(self, jobNum)
		if self._dataSplitter is None:
			data['MAX_EVENTS'] = self.eventsPerJob
		return data


	def getDescription(self, jobNum): # (task name, job name, type)
		result = SCRAMTask.getDescription(self, jobNum)
		if not result.jobType:
			result.jobType = 'analysis'
		return result
示例#4
0
class CMSSW(DataTask):
	getConfigSections = DataTask.createFunction_getConfigSections(['CMSSW'])

	def __init__(self, config, name):
		config.set('se input timeout', '0:30', override = False)
		config.set('dataset provider', 'DBS3Provider', override = False)
		config.set('dataset splitter', 'EventBoundarySplitter', override = False)
		DataTask.__init__(self, config, name)
		self.errorDict.update(dict(self.updateErrorDict(utils.pathShare('gc-run.cmssw.sh', pkg = 'grid_control_cms'))))

		# SCRAM info
		scramProject = config.getList('scram project', [])
		if len(scramProject):
			self.projectArea = config.getPath('project area', '')
			if len(self.projectArea):
				raise ConfigError('Cannot specify both SCRAM project and project area')
			if len(scramProject) != 2:
				raise ConfigError('SCRAM project needs exactly 2 arguments: PROJECT VERSION')
		else:
			self.projectArea = config.getPath('project area')

		# This works in tandem with provider_dbsv2.py !
		self.selectedLumis = parseLumiFilter(config.get('lumi filter', ''))

		self.useReqs = config.getBool('software requirements', True, onChange = None)
		self.seRuntime = config.getBool('se runtime', False)
		self.runtimePath = config.getWorkPath('runtime.tar.gz')

		if len(self.projectArea):
			defaultPattern = '-.* -config bin lib python module */data *.xml *.sql *.cf[if] *.py -*/.git -*/.svn -*/CVS -*/work.*'
			self.pattern = config.getList('area files', defaultPattern.split())

			if os.path.exists(self.projectArea):
				utils.vprint('Project area found in: %s' % self.projectArea, -1)
			else:
				raise ConfigError('Specified config area %r does not exist!' % self.projectArea)

			scramPath = os.path.join(self.projectArea, '.SCRAM')
			# try to open it
			try:
				fp = open(os.path.join(scramPath, 'Environment'), 'r')
				self.scramEnv = utils.DictFormat().parse(fp, keyParser = {None: str})
			except:
				raise ConfigError('Project area file %s/.SCRAM/Environment cannot be parsed!' % self.projectArea)

			for key in ['SCRAM_PROJECTNAME', 'SCRAM_PROJECTVERSION']:
				if key not in self.scramEnv:
					raise ConfigError('Installed program in project area not recognized.')

			archs = filter(lambda x: os.path.isdir(os.path.join(scramPath, x)) and not x.startswith('.'), os.listdir(scramPath))
			self.scramArch = config.get('scram arch', (archs + [noDefault])[0])
			try:
				fp = open(os.path.join(scramPath, self.scramArch, 'Environment'), 'r')
				self.scramEnv.update(utils.DictFormat().parse(fp, keyParser = {None: str}))
			except:
				raise ConfigError('Project area file .SCRAM/%s/Environment cannot be parsed!' % self.scramArch)
		else:
			self.scramEnv = {
				'SCRAM_PROJECTNAME': scramProject[0],
				'SCRAM_PROJECTVERSION': scramProject[1]
			}
			self.scramArch = config.get('scram arch')

		self.scramVersion = config.get('scram version', 'scramv1')
		if self.scramEnv['SCRAM_PROJECTNAME'] != 'CMSSW':
			raise ConfigError('Project area not a valid CMSSW project area.')

		# Information about search order for software environment
		self.searchLoc = []
		if config.getState('sandbox'):
			userPath = config.get('cmssw dir', '')
			if userPath != '':
				self.searchLoc.append(('CMSSW_DIR_USER', userPath))
			if self.scramEnv.get('RELEASETOP', None):
				projPath = os.path.normpath('%s/../../../../' % self.scramEnv['RELEASETOP'])
				self.searchLoc.append(('CMSSW_DIR_PRO', projPath))
		if len(self.searchLoc):
			utils.vprint('Local jobs will try to use the CMSSW software located here:', -1)
			for i, loc in enumerate(self.searchLoc):
				key, value = loc
				utils.vprint(' %i) %s' % (i + 1, value), -1)

		# Prolog / Epilog script support - warn about old syntax
		self.prolog = TaskExecutableWrapper(config, 'prolog', '')
		self.epilog = TaskExecutableWrapper(config, 'epilog', '')
		if config.getPaths('executable', []) != []:
			raise ConfigError('Prefix executable and argument options with either prolog or epilog!')
		self.arguments = config.get('arguments', '')

		# Get cmssw config files and check their existance
		self.configFiles = []
		cfgDefault = QM(self.prolog.isActive() or self.epilog.isActive(), [], noDefault)
		for cfgFile in config.getPaths('config file', cfgDefault, mustExist = False):
			newPath = config.getWorkPath(os.path.basename(cfgFile))
			if not os.path.exists(newPath):
				if not os.path.exists(cfgFile):
					raise ConfigError('Config file %r not found.' % cfgFile)
				shutil.copyfile(cfgFile, newPath)
			self.configFiles.append(newPath)

		# Check that for dataset jobs the necessary placeholders are in the config file
		self.prepare = config.getBool('prepare config', False)
		fragment = config.getPath('instrumentation fragment',
			os.path.join('packages', 'grid_control_cms', 'share', 'fragmentForCMSSW.py'))
		if self.dataSplitter != None:
			if config.getState('sandbox'):
				if len(self.configFiles) > 0:
					self.instrumentCfgQueue(self.configFiles, fragment, mustPrepare = True)
		else:
			self.eventsPerJob = config.get('events per job', '0')
			if config.getState(detail = 'sandbox') and self.prepare:
				self.instrumentCfgQueue(self.configFiles, fragment)
		if not os.path.exists(config.getWorkPath('runtime.tar.gz')):
			config.setState(True, detail = 'sandbox')
		if config.getState(detail = 'sandbox'):
			if os.path.exists(config.getWorkPath('runtime.tar.gz')):
				if not utils.getUserBool('Runtime already exists! Do you want to regenerate CMSSW tarball?', True):
					return
			# Generate runtime tarball (and move to SE)
			if self.projectArea:
				utils.genTarball(config.getWorkPath('runtime.tar.gz'), utils.matchFiles(self.projectArea, self.pattern))
			if self.seRuntime:
				config.setState(True, detail = 'storage')


	def initDataProcessor(self):
		return CMSDataSplitProcessor(self.checkSE)


	def instrumentCfgQueue(self, cfgFiles, fragment, mustPrepare = False):
		def isInstrumented(cfgName):
			cfg = open(cfgName, 'r').read()
			for tag in self.neededVars():
				if (not '__%s__' % tag in cfg) and (not '@%s@' % tag in cfg):
					return False
			return True
		def doInstrument(cfgName):
			if not isInstrumented(cfgName) or 'customise_for_gc' not in open(cfgName, 'r').read():
				utils.vprint('Instrumenting...', os.path.basename(cfgName), -1)
				open(cfgName, 'a').write(open(fragment, 'r').read())
			else:
				utils.vprint('%s already contains customise_for_gc and all needed variables' % os.path.basename(cfgName), -1)

		cfgStatus = []
		comPath = os.path.dirname(os.path.commonprefix(cfgFiles))
		for cfg in cfgFiles:
			cfgStatus.append({0: cfg.split(comPath, 1)[1].lstrip('/'), 1: str(isInstrumented(cfg)), 2: cfg})
		utils.printTabular([(0, 'Config file'), (1, 'Instrumented')], cfgStatus, 'lc')

		for cfg in cfgFiles:
			if self.prepare or not isInstrumented(cfg):
				if self.prepare or utils.getUserBool('Do you want to prepare %s for running over the dataset?' % cfg, True):
					doInstrument(cfg)
		if mustPrepare and not (True in map(isInstrumented, cfgFiles)):
			raise ConfigError('A config file must use %s to work properly!' %
				str.join(', ', map(lambda x: '@%s@' % x, self.neededVars())))


	# Lumi filter need
	def neededVars(self):
		result = []
		varMap = {
			DataSplitter.NEntries: 'MAX_EVENTS',
			DataSplitter.Skipped: 'SKIP_EVENTS',
			DataSplitter.FileList: 'FILE_NAMES'
		}
		if self.dataSplitter:
			result.extend(map(lambda x: varMap[x], self.dataSplitter.neededVars()))
		if self.selectedLumis:
			result.append('LUMI_RANGE')
		return result


	# Called on job submission
	def getSubmitInfo(self, jobNum):
		result = DataTask.getSubmitInfo(self, jobNum)
		result.update({'application': self.scramEnv['SCRAM_PROJECTVERSION'], 'exe': 'cmsRun'})
		if self.dataSplitter == None:
			result.update({'nevtJob': self.eventsPerJob})
		return result


	# Get environment variables for gc_config.sh
	def getTaskConfig(self):
		data = DataTask.getTaskConfig(self)
		data.update(dict(self.searchLoc))
		data['CMSSW_OLD_RELEASETOP'] = self.scramEnv.get('RELEASETOP', None)
		data['DB_EXEC'] = 'cmsRun'
		data['SCRAM_ARCH'] = self.scramArch
		data['SCRAM_VERSION'] = self.scramVersion
		data['SCRAM_PROJECTVERSION'] = self.scramEnv['SCRAM_PROJECTVERSION']
		data['GZIP_OUT'] = QM(self.gzipOut, 'yes', 'no')
		data['SE_RUNTIME'] = QM(self.seRuntime, 'yes', 'no')
		data['HAS_RUNTIME'] = QM(len(self.projectArea), 'yes', 'no')
		data['CMSSW_CONFIG'] = str.join(' ', map(os.path.basename, self.configFiles))
		if self.prolog.isActive():
			data['CMSSW_PROLOG_EXEC'] = self.prolog.getCommand()
			data['CMSSW_PROLOG_SB_In_FILES'] = str.join(' ', self.prolog.getSBInFiles())
			data['CMSSW_PROLOG_ARGS'] = self.prolog.getArguments()
		if self.epilog.isActive():
			data['CMSSW_EPILOG_EXEC'] = self.epilog.getCommand()
			data['CMSSW_EPILOG_SB_In_FILES'] = str.join(' ', self.epilog.getSBInFiles())
			data['CMSSW_EPILOG_ARGS'] = self.epilog.getArguments()
		return data


	# Get job requirements
	def getRequirements(self, jobNum):
		reqs = DataTask.getRequirements(self, jobNum)
		if self.useReqs:
			reqs.append((WMS.SOFTWARE, 'VO-cms-%s' % self.scramEnv['SCRAM_PROJECTVERSION']))
			reqs.append((WMS.SOFTWARE, 'VO-cms-%s' % self.scramArch))
		return reqs


	# Get files to be transfered via SE (description, source, target)
	def getSEInFiles(self):
		files = DataTask.getSEInFiles(self)
		if len(self.projectArea) and self.seRuntime:
			return files + [('CMSSW runtime', self.runtimePath, self.taskID + '.tar.gz')]
		return files


	# Get files for input sandbox
	def getSBInFiles(self):
		files = DataTask.getSBInFiles(self) + self.configFiles + self.prolog.getSBInFiles() + self.epilog.getSBInFiles()
		if len(self.projectArea) and not self.seRuntime:
			files.append(self.runtimePath)
		return files + [utils.pathShare('gc-run.cmssw.sh', pkg = 'grid_control_cms')]


	# Get files for output sandbox
	def getSBOutFiles(self):
		return DataTask.getSBOutFiles(self) + QM(self.gzipOut, ['cmssw.log.gz'], []) + ['cmssw.dbs.tar.gz']


	def getCommand(self):
		return './gc-run.cmssw.sh $@'


	def getJobArguments(self, jobNum):
		return DataTask.getJobArguments(self, jobNum) + ' ' + self.arguments


	def getActiveLumiFilter(self, lumifilter, jobNum = None):
		getLR = lambda x: str.join(',', map(lambda x: '"%s"' % x, formatLumi(x)))
		return getLR(lumifilter) # TODO: Validate subset selection
		try:
			splitInfo = self.dataSplitter.getSplitInfo(jobNum)
			runTag = splitInfo[DataSplitter.MetadataHeader].index("Runs")
			runList = utils.listMapReduce(lambda m: m[runTag], splitInfo[DataSplitter.Metadata])
			return getLR(filterLumiFilter(runList, lumifilter))
		except:
			return getLR(lumifilter)


	def getVarNames(self):
		result = DataTask.getVarNames(self)
		if self.dataSplitter == None:
			result.append('MAX_EVENTS')
		if self.selectedLumis:
			result.append('LUMI_RANGE')
		return result


	# Get job dependent environment variables
	def getJobConfig(self, jobNum):
		data = DataTask.getJobConfig(self, jobNum)
		if self.dataSplitter == None:
			data['MAX_EVENTS'] = self.eventsPerJob
		if self.selectedLumis:
			data['LUMI_RANGE'] = self.getActiveLumiFilter(self.selectedLumis)
		return data


	def getDescription(self, jobNum): # (task name, job name, type)
		(taskName, jobName, jobType) = DataTask.getDescription(self, jobNum)
		return (taskName, jobName, QM(jobType, jobType, 'analysis'))


	def getDependencies(self):
		return DataTask.getDependencies(self) + ['cmssw']
示例#5
0
class CMSSW(DataTask):
	configSections = DataTask.configSections + ['CMSSW']

	def __init__(self, config, name):
		config.set('se input timeout', '0:30')
		config.set('dataset provider', 'DBS3Provider')
		config.set('dataset splitter', 'EventBoundarySplitter')
		config.set('partition processor', 'CMSPartitionProcessor LocationPartitionProcessor LumiPartitionProcessor')
		config.set('dataset processor', 'LumiDataProcessor', '+=')
		DataTask.__init__(self, config, name)
		self.updateErrorDict(utils.pathShare('gc-run.cmssw.sh', pkg = 'grid_control_cms'))

		# SCRAM settings
		self._configureSCRAMSettings(config)

		self.useReqs = config.getBool('software requirements', True, onChange = None)
		self._projectAreaTarballSE = config.getBool(['se project area', 'se runtime'], True)
		self._projectAreaTarball = config.getWorkPath('cmssw-project-area.tar.gz')

		# Information about search order for software environment
		self.searchLoc = self._getCMSSWPaths(config)
		# Prolog / Epilog script support - warn about old syntax
		self.prolog = TaskExecutableWrapper(config, 'prolog', '')
		self.epilog = TaskExecutableWrapper(config, 'epilog', '')
		if config.getPaths('executable', []) != []:
			raise ConfigError('Prefix executable and argument options with either prolog or epilog!')
		self.arguments = config.get('arguments', '')

		# Get cmssw config files and check their existance
		# Check that for dataset jobs the necessary placeholders are in the config file
		if self.dataSplitter is None:
			self.eventsPerJob = config.get('events per job', '0')
		fragment = config.getPath('instrumentation fragment', utils.pathShare('fragmentForCMSSW.py', pkg = 'grid_control_cms'))
		self.configFiles = self._processConfigFiles(config, list(self._getConfigFiles(config)), fragment,
			autoPrepare = config.getBool('instrumentation', True),
			mustPrepare = (self.dataSplitter is not None))

		# Create project area tarball
		if not os.path.exists(self._projectAreaTarball):
			config.setState(True, 'init', detail = 'sandbox')
		if config.getState('init', detail = 'sandbox'):
			if os.path.exists(self._projectAreaTarball):
				if not utils.getUserBool('CMSSW tarball already exists! Do you want to regenerate it?', True):
					return
			# Generate CMSSW tarball
			if self.projectArea:
				utils.genTarball(self._projectAreaTarball, utils.matchFiles(self.projectArea, self.pattern))
			if self._projectAreaTarballSE:
				config.setState(True, 'init', detail = 'storage')


	def _configureSCRAMSettings(self, config):
		scramProject = config.getList('scram project', [])
		if len(scramProject):
			self.projectArea = config.getPath('project area', '')
			if len(self.projectArea):
				raise ConfigError('Cannot specify both SCRAM project and project area')
			if len(scramProject) != 2:
				raise ConfigError('SCRAM project needs exactly 2 arguments: PROJECT VERSION')
		else:
			self.projectArea = config.getPath('project area')

		if len(self.projectArea):
			defaultPattern = '-.* -config bin lib python module */data *.xml *.sql *.cf[if] *.py -*/.git -*/.svn -*/CVS -*/work.*'
			self.pattern = config.getList('area files', defaultPattern.split())

			if os.path.exists(self.projectArea):
				utils.vprint('Project area found in: %s' % self.projectArea, -1)
			else:
				raise ConfigError('Specified config area %r does not exist!' % self.projectArea)

			scramPath = os.path.join(self.projectArea, '.SCRAM')
			# try to open it
			try:
				fp = open(os.path.join(scramPath, 'Environment'), 'r')
				self.scramEnv = utils.DictFormat().parse(fp, keyParser = {None: str})
			except Exception:
				raise ConfigError('Project area file %s/.SCRAM/Environment cannot be parsed!' % self.projectArea)

			for key in ['SCRAM_PROJECTNAME', 'SCRAM_PROJECTVERSION']:
				if key not in self.scramEnv:
					raise ConfigError('Installed program in project area not recognized.')

			archs = lfilter(lambda x: os.path.isdir(os.path.join(scramPath, x)) and not x.startswith('.'), os.listdir(scramPath))
			self.scramArch = config.get('scram arch', (archs + [noDefault])[0])
			try:
				fp = open(os.path.join(scramPath, self.scramArch, 'Environment'), 'r')
				self.scramEnv.update(utils.DictFormat().parse(fp, keyParser = {None: str}))
			except Exception:
				raise ConfigError('Project area file .SCRAM/%s/Environment cannot be parsed!' % self.scramArch)
		else:
			self.scramEnv = {
				'SCRAM_PROJECTNAME': scramProject[0],
				'SCRAM_PROJECTVERSION': scramProject[1]
			}
			self.scramArch = config.get('scram arch')

		self.scramVersion = config.get('scram version', 'scramv1')
		if self.scramEnv['SCRAM_PROJECTNAME'] != 'CMSSW':
			raise ConfigError('Project area contains no CMSSW project')


	def _getCMSSWPaths(self, config):
		result = []
		if config.getState('init', detail = 'sandbox'):
			userPath = config.get('cmssw dir', '')
			if userPath != '':
				result.append(('CMSSW_DIR_USER', userPath))
			if self.scramEnv.get('RELEASETOP', None):
				projPath = os.path.normpath('%s/../../../../' % self.scramEnv['RELEASETOP'])
				result.append(('CMSSW_DIR_PRO', projPath))
		if result:
			utils.vprint('Local jobs will try to use the CMSSW software located here:', -1)
			for i, loc in enumerate(result):
				utils.vprint(' %i) %s' % (i + 1, loc[1]), -1)
		return result


	def _getConfigFiles(self, config):
		cfgDefault = utils.QM(self.prolog.isActive() or self.epilog.isActive(), [], noDefault)
		for cfgFile in config.getPaths('config file', cfgDefault, mustExist = False):
			if not os.path.exists(cfgFile):
				raise ConfigError('Config file %r not found.' % cfgFile)
			yield cfgFile


	def _cfgIsInstrumented(self, fn):
		fp = open(fn, 'r')
		try:
			cfg = fp.read()
		finally:
			fp.close()
		for tag in self.neededVars():
			if (not '__%s__' % tag in cfg) and (not '@%s@' % tag in cfg):
				return False
		return True


	def _cfgStore(self, source, target, fragment_path = None):
		fp = open(source, 'r')
		try:
			content = fp.read()
		finally:
			fp.close()
		fp = open(target, 'w')
		try:
			fp.write(content)
			if fragment_path:
				logging.getLogger('user').info('Instrumenting... %s', os.path.basename(source))
				fragment_fp = open(fragment_path, 'r')
				fp.write(fragment_fp.read())
				fragment_fp.close()
		finally:
			fp.close()


	def _cfgFindUninitialized(self, config, cfgFiles, autoPrepare, mustPrepare):
		comPath = os.path.dirname(os.path.commonprefix(cfgFiles))

		cfgTodo = []
		cfgStatus = []
		for cfg in cfgFiles:
			cfg_new = config.getWorkPath(os.path.basename(cfg))
			cfg_new_exists = os.path.exists(cfg_new)
			if cfg_new_exists:
				isInstrumented = self._cfgIsInstrumented(cfg_new)
				doCopy = False
			else:
				isInstrumented = self._cfgIsInstrumented(cfg)
				doCopy = True
			doPrepare = (mustPrepare or autoPrepare) and not isInstrumented
			doCopy = doCopy or doPrepare
			if doCopy:
				cfgTodo.append((cfg, cfg_new, doPrepare))
			cfgStatus.append({1: cfg.split(comPath, 1)[1].lstrip('/'), 2: cfg_new_exists,
				3: isInstrumented, 4: doPrepare})

		utils.vprint('', -1)
		utils.printTabular([(1, 'Config file'), (2, 'Work dir'), (3, 'Instrumented'), (4, 'Scheduled')], cfgStatus, 'lccc')
		utils.vprint('', -1)
		return cfgTodo


	def _processConfigFiles(self, config, cfgFiles, fragment_path, autoPrepare, mustPrepare):
		# process list of uninitialized config files
		for (cfg, cfg_new, doPrepare) in self._cfgFindUninitialized(config, cfgFiles, autoPrepare, mustPrepare):
			if doPrepare and (autoPrepare or utils.getUserBool('Do you want to prepare %s for running over the dataset?' % cfg, True)):
				self._cfgStore(cfg, cfg_new, fragment_path)
			else:
				self._cfgStore(cfg, cfg_new)

		result = []
		for cfg in cfgFiles:
			cfg_new = config.getWorkPath(os.path.basename(cfg))
			if not os.path.exists(cfg_new):
				raise ConfigError('Config file %r was not copied to the work directory!' % cfg)
			isInstrumented = self._cfgIsInstrumented(cfg_new)
			if mustPrepare and not isInstrumented:
				raise ConfigError('Config file %r must use %s to work properly!' %
					(cfg, str.join(', ', imap(lambda x: '@%s@' % x, self.neededVars()))))
			if autoPrepare and not isInstrumented:
				self._log.warning('Config file %r was not instrumented!', cfg)
			result.append(cfg_new)
		return result


	def neededVars(self):
		if self.dataSplitter:
			return self._dataPS.getNeededDataKeys()
		return []


	# Called on job submission
	def getSubmitInfo(self, jobNum):
		result = DataTask.getSubmitInfo(self, jobNum)
		result.update({'application': self.scramEnv['SCRAM_PROJECTVERSION'], 'exe': 'cmsRun'})
		if self.dataSplitter is None:
			result.update({'nevtJob': self.eventsPerJob})
		return result


	# Get environment variables for gc_config.sh
	def getTaskConfig(self):
		data = DataTask.getTaskConfig(self)
		data.update(dict(self.searchLoc))
		data['CMSSW_OLD_RELEASETOP'] = self.scramEnv.get('RELEASETOP', None)
		data['DB_EXEC'] = 'cmsRun'
		data['SCRAM_ARCH'] = self.scramArch
		data['SCRAM_VERSION'] = self.scramVersion
		data['SCRAM_PROJECTVERSION'] = self.scramEnv['SCRAM_PROJECTVERSION']
		data['GZIP_OUT'] = utils.QM(self.gzipOut, 'yes', 'no')
		data['SE_RUNTIME'] = utils.QM(self._projectAreaTarballSE, 'yes', 'no')
		data['HAS_RUNTIME'] = utils.QM(len(self.projectArea), 'yes', 'no')
		data['CMSSW_CONFIG'] = str.join(' ', imap(os.path.basename, self.configFiles))
		if self.prolog.isActive():
			data['CMSSW_PROLOG_EXEC'] = self.prolog.getCommand()
			data['CMSSW_PROLOG_SB_In_FILES'] = str.join(' ', imap(lambda x: x.pathRel, self.prolog.getSBInFiles()))
			data['CMSSW_PROLOG_ARGS'] = self.prolog.getArguments()
		if self.epilog.isActive():
			data['CMSSW_EPILOG_EXEC'] = self.epilog.getCommand()
			data['CMSSW_EPILOG_SB_In_FILES'] = str.join(' ', imap(lambda x: x.pathRel, self.epilog.getSBInFiles()))
			data['CMSSW_EPILOG_ARGS'] = self.epilog.getArguments()
		return data


	# Get job requirements
	def getRequirements(self, jobNum):
		reqs = DataTask.getRequirements(self, jobNum)
		if self.useReqs:
			reqs.append((WMS.SOFTWARE, 'VO-cms-%s' % self.scramArch))
		return reqs


	# Get files to be transfered via SE (description, source, target)
	def getSEInFiles(self):
		files = DataTask.getSEInFiles(self)
		if len(self.projectArea) and self._projectAreaTarballSE:
			return files + [('CMSSW tarball', self._projectAreaTarball, self.taskID + '.tar.gz')]
		return files


	# Get files for input sandbox
	def getSBInFiles(self):
		files = DataTask.getSBInFiles(self) + self.prolog.getSBInFiles() + self.epilog.getSBInFiles()
		for cfgFile in self.configFiles:
			files.append(utils.Result(pathAbs = cfgFile, pathRel = os.path.basename(cfgFile)))
		if len(self.projectArea) and not self._projectAreaTarballSE:
			files.append(utils.Result(pathAbs = self._projectAreaTarball, pathRel = os.path.basename(self._projectAreaTarball)))
		return files + [utils.Result(pathAbs = utils.pathShare('gc-run.cmssw.sh', pkg = 'grid_control_cms'), pathRel = 'gc-run.cmssw.sh')]


	# Get files for output sandbox
	def getSBOutFiles(self):
		return DataTask.getSBOutFiles(self) + utils.QM(self.gzipOut, ['cmssw.log.gz'], []) + ['cmssw.dbs.tar.gz']


	def getCommand(self):
		return './gc-run.cmssw.sh $@'


	def getJobArguments(self, jobNum):
		return DataTask.getJobArguments(self, jobNum) + ' ' + self.arguments


	def getVarNames(self):
		result = DataTask.getVarNames(self)
		if self.dataSplitter is None:
			result.append('MAX_EVENTS')
		return result


	# Get job dependent environment variables
	def getJobConfig(self, jobNum):
		data = DataTask.getJobConfig(self, jobNum)
		if self.dataSplitter is None:
			data['MAX_EVENTS'] = self.eventsPerJob
		return data


	def getDescription(self, jobNum): # (task name, job name, type)
		result = DataTask.getDescription(self, jobNum)
		if not result.jobType:
			result.jobType = 'analysis'
		return result


	def getDependencies(self):
		return DataTask.getDependencies(self) + ['cmssw']
示例#6
0
class CMSSW(DataTask):
    configSections = DataTask.configSections + ['CMSSW']

    def __init__(self, config, name):
        config.set('se input timeout', '0:30')
        config.set('dataset provider', 'DBS3Provider')
        config.set('dataset splitter', 'EventBoundarySplitter')
        config.set(
            'partition processor',
            'CMSPartitionProcessor LocationPartitionProcessor LumiPartitionProcessor'
        )
        config.set('dataset processor', 'LumiDataProcessor', '+=')
        DataTask.__init__(self, config, name)
        self.updateErrorDict(
            utils.pathShare('gc-run.cmssw.sh', pkg='grid_control_cms'))

        # SCRAM settings
        self._configureSCRAMSettings(config)

        self.useReqs = config.getBool('software requirements',
                                      True,
                                      onChange=None)
        self._projectAreaTarballSE = config.getBool(
            ['se project area', 'se runtime'], True)
        self._projectAreaTarball = config.getWorkPath(
            'cmssw-project-area.tar.gz')

        # Information about search order for software environment
        self.searchLoc = self._getCMSSWPaths(config)
        # Prolog / Epilog script support - warn about old syntax
        self.prolog = TaskExecutableWrapper(config, 'prolog', '')
        self.epilog = TaskExecutableWrapper(config, 'epilog', '')
        if config.getPaths('executable', []) != []:
            raise ConfigError(
                'Prefix executable and argument options with either prolog or epilog!'
            )
        self.arguments = config.get('arguments', '')

        # Get cmssw config files and check their existance
        # Check that for dataset jobs the necessary placeholders are in the config file
        if self.dataSplitter is None:
            self.eventsPerJob = config.get('events per job', '0')
        fragment = config.getPath(
            'instrumentation fragment',
            utils.pathShare('fragmentForCMSSW.py', pkg='grid_control_cms'))
        self.configFiles = self._processConfigFiles(
            config,
            list(self._getConfigFiles(config)),
            fragment,
            autoPrepare=config.getBool('instrumentation', True),
            mustPrepare=(self.dataSplitter is not None))

        # Create project area tarball
        if not os.path.exists(self._projectAreaTarball):
            config.setState(True, 'init', detail='sandbox')
        if config.getState('init', detail='sandbox'):
            if os.path.exists(self._projectAreaTarball):
                if not utils.getUserBool(
                        'CMSSW tarball already exists! Do you want to regenerate it?',
                        True):
                    return
            # Generate CMSSW tarball
            if self.projectArea:
                utils.genTarball(
                    self._projectAreaTarball,
                    utils.matchFiles(self.projectArea, self.pattern))
            if self._projectAreaTarballSE:
                config.setState(True, 'init', detail='storage')

    def _configureSCRAMSettings(self, config):
        scramProject = config.getList('scram project', [])
        if len(scramProject):
            self.projectArea = config.getPath('project area', '')
            if len(self.projectArea):
                raise ConfigError(
                    'Cannot specify both SCRAM project and project area')
            if len(scramProject) != 2:
                raise ConfigError(
                    'SCRAM project needs exactly 2 arguments: PROJECT VERSION')
        else:
            self.projectArea = config.getPath('project area')

        if len(self.projectArea):
            defaultPattern = '-.* -config bin lib python module */data *.xml *.sql *.cf[if] *.py -*/.git -*/.svn -*/CVS -*/work.*'
            self.pattern = config.getList('area files', defaultPattern.split())

            if os.path.exists(self.projectArea):
                utils.vprint('Project area found in: %s' % self.projectArea,
                             -1)
            else:
                raise ConfigError('Specified config area %r does not exist!' %
                                  self.projectArea)

            scramPath = os.path.join(self.projectArea, '.SCRAM')
            # try to open it
            try:
                fp = open(os.path.join(scramPath, 'Environment'), 'r')
                self.scramEnv = utils.DictFormat().parse(fp,
                                                         keyParser={None: str})
            except Exception:
                raise ConfigError(
                    'Project area file %s/.SCRAM/Environment cannot be parsed!'
                    % self.projectArea)

            for key in ['SCRAM_PROJECTNAME', 'SCRAM_PROJECTVERSION']:
                if key not in self.scramEnv:
                    raise ConfigError(
                        'Installed program in project area not recognized.')

            archs = lfilter(
                lambda x: os.path.isdir(os.path.join(scramPath, x)) and not x.
                startswith('.'), os.listdir(scramPath))
            self.scramArch = config.get('scram arch', (archs + [noDefault])[0])
            try:
                fp = open(
                    os.path.join(scramPath, self.scramArch, 'Environment'),
                    'r')
                self.scramEnv.update(utils.DictFormat().parse(
                    fp, keyParser={None: str}))
            except Exception:
                raise ConfigError(
                    'Project area file .SCRAM/%s/Environment cannot be parsed!'
                    % self.scramArch)
        else:
            self.scramEnv = {
                'SCRAM_PROJECTNAME': scramProject[0],
                'SCRAM_PROJECTVERSION': scramProject[1]
            }
            self.scramArch = config.get('scram arch')

        self.scramVersion = config.get('scram version', 'scramv1')
        if self.scramEnv['SCRAM_PROJECTNAME'] != 'CMSSW':
            raise ConfigError('Project area contains no CMSSW project')

    def _getCMSSWPaths(self, config):
        result = []
        if config.getState('init', detail='sandbox'):
            userPath = config.get('cmssw dir', '')
            if userPath != '':
                result.append(('CMSSW_DIR_USER', userPath))
            if self.scramEnv.get('RELEASETOP', None):
                projPath = os.path.normpath('%s/../../../../' %
                                            self.scramEnv['RELEASETOP'])
                result.append(('CMSSW_DIR_PRO', projPath))
        if result:
            utils.vprint(
                'Local jobs will try to use the CMSSW software located here:',
                -1)
            for i, loc in enumerate(result):
                utils.vprint(' %i) %s' % (i + 1, loc[1]), -1)
        return result

    def _getConfigFiles(self, config):
        cfgDefault = utils.QM(self.prolog.isActive() or self.epilog.isActive(),
                              [], noDefault)
        for cfgFile in config.getPaths('config file',
                                       cfgDefault,
                                       mustExist=False):
            if not os.path.exists(cfgFile):
                raise ConfigError('Config file %r not found.' % cfgFile)
            yield cfgFile

    def _cfgIsInstrumented(self, fn):
        fp = open(fn, 'r')
        try:
            cfg = fp.read()
        finally:
            fp.close()
        for tag in self.neededVars():
            if (not '__%s__' % tag in cfg) and (not '@%s@' % tag in cfg):
                return False
        return True

    def _cfgStore(self, source, target, fragment_path=None):
        fp = open(source, 'r')
        try:
            content = fp.read()
        finally:
            fp.close()
        fp = open(target, 'w')
        try:
            fp.write(content)
            if fragment_path:
                logging.getLogger('user').info('Instrumenting... %s',
                                               os.path.basename(source))
                fragment_fp = open(fragment_path, 'r')
                fp.write(fragment_fp.read())
                fragment_fp.close()
        finally:
            fp.close()

    def _cfgFindUninitialized(self, config, cfgFiles, autoPrepare,
                              mustPrepare):
        comPath = os.path.dirname(os.path.commonprefix(cfgFiles))

        cfgTodo = []
        cfgStatus = []
        for cfg in cfgFiles:
            cfg_new = config.getWorkPath(os.path.basename(cfg))
            cfg_new_exists = os.path.exists(cfg_new)
            if cfg_new_exists:
                isInstrumented = self._cfgIsInstrumented(cfg_new)
                doCopy = False
            else:
                isInstrumented = self._cfgIsInstrumented(cfg)
                doCopy = True
            doPrepare = (mustPrepare or autoPrepare) and not isInstrumented
            doCopy = doCopy or doPrepare
            if doCopy:
                cfgTodo.append((cfg, cfg_new, doPrepare))
            cfgStatus.append({
                1: cfg.split(comPath, 1)[1].lstrip('/'),
                2: cfg_new_exists,
                3: isInstrumented,
                4: doPrepare
            })

        utils.vprint('', -1)
        utils.printTabular([(1, 'Config file'), (2, 'Work dir'),
                            (3, 'Instrumented'), (4, 'Scheduled')], cfgStatus,
                           'lccc')
        utils.vprint('', -1)
        return cfgTodo

    def _processConfigFiles(self, config, cfgFiles, fragment_path, autoPrepare,
                            mustPrepare):
        # process list of uninitialized config files
        for (cfg, cfg_new,
             doPrepare) in self._cfgFindUninitialized(config, cfgFiles,
                                                      autoPrepare,
                                                      mustPrepare):
            if doPrepare and (autoPrepare or utils.getUserBool(
                    'Do you want to prepare %s for running over the dataset?' %
                    cfg, True)):
                self._cfgStore(cfg, cfg_new, fragment_path)
            else:
                self._cfgStore(cfg, cfg_new)

        result = []
        for cfg in cfgFiles:
            cfg_new = config.getWorkPath(os.path.basename(cfg))
            if not os.path.exists(cfg_new):
                raise ConfigError(
                    'Config file %r was not copied to the work directory!' %
                    cfg)
            isInstrumented = self._cfgIsInstrumented(cfg_new)
            if mustPrepare and not isInstrumented:
                raise ConfigError(
                    'Config file %r must use %s to work properly!' %
                    (cfg,
                     str.join(', ',
                              imap(lambda x: '@%s@' % x, self.neededVars()))))
            if autoPrepare and not isInstrumented:
                self._log.warning('Config file %r was not instrumented!', cfg)
            result.append(cfg_new)
        return result

    def neededVars(self):
        if self.dataSplitter:
            return self._dataPS.getNeededDataKeys()
        return []

    # Called on job submission
    def getSubmitInfo(self, jobNum):
        result = DataTask.getSubmitInfo(self, jobNum)
        result.update({
            'application': self.scramEnv['SCRAM_PROJECTVERSION'],
            'exe': 'cmsRun'
        })
        if self.dataSplitter is None:
            result.update({'nevtJob': self.eventsPerJob})
        return result

    # Get environment variables for gc_config.sh
    def getTaskConfig(self):
        data = DataTask.getTaskConfig(self)
        data.update(dict(self.searchLoc))
        data['CMSSW_OLD_RELEASETOP'] = self.scramEnv.get('RELEASETOP', None)
        data['DB_EXEC'] = 'cmsRun'
        data['SCRAM_ARCH'] = self.scramArch
        data['SCRAM_VERSION'] = self.scramVersion
        data['SCRAM_PROJECTVERSION'] = self.scramEnv['SCRAM_PROJECTVERSION']
        data['GZIP_OUT'] = utils.QM(self.gzipOut, 'yes', 'no')
        data['SE_RUNTIME'] = utils.QM(self._projectAreaTarballSE, 'yes', 'no')
        data['HAS_RUNTIME'] = utils.QM(len(self.projectArea), 'yes', 'no')
        data['CMSSW_CONFIG'] = str.join(
            ' ', imap(os.path.basename, self.configFiles))
        if self.prolog.isActive():
            data['CMSSW_PROLOG_EXEC'] = self.prolog.getCommand()
            data['CMSSW_PROLOG_SB_In_FILES'] = str.join(
                ' ', imap(lambda x: x.pathRel, self.prolog.getSBInFiles()))
            data['CMSSW_PROLOG_ARGS'] = self.prolog.getArguments()
        if self.epilog.isActive():
            data['CMSSW_EPILOG_EXEC'] = self.epilog.getCommand()
            data['CMSSW_EPILOG_SB_In_FILES'] = str.join(
                ' ', imap(lambda x: x.pathRel, self.epilog.getSBInFiles()))
            data['CMSSW_EPILOG_ARGS'] = self.epilog.getArguments()
        return data

    # Get job requirements
    def getRequirements(self, jobNum):
        reqs = DataTask.getRequirements(self, jobNum)
        if self.useReqs:
            reqs.append((WMS.SOFTWARE, 'VO-cms-%s' % self.scramArch))
        return reqs

    # Get files to be transfered via SE (description, source, target)
    def getSEInFiles(self):
        files = DataTask.getSEInFiles(self)
        if len(self.projectArea) and self._projectAreaTarballSE:
            return files + [('CMSSW tarball', self._projectAreaTarball,
                             self.taskID + '.tar.gz')]
        return files

    # Get files for input sandbox
    def getSBInFiles(self):
        files = DataTask.getSBInFiles(
            self) + self.prolog.getSBInFiles() + self.epilog.getSBInFiles()
        for cfgFile in self.configFiles:
            files.append(
                utils.Result(pathAbs=cfgFile,
                             pathRel=os.path.basename(cfgFile)))
        if len(self.projectArea) and not self._projectAreaTarballSE:
            files.append(
                utils.Result(pathAbs=self._projectAreaTarball,
                             pathRel=os.path.basename(
                                 self._projectAreaTarball)))
        return files + [
            utils.Result(pathAbs=utils.pathShare('gc-run.cmssw.sh',
                                                 pkg='grid_control_cms'),
                         pathRel='gc-run.cmssw.sh')
        ]

    # Get files for output sandbox
    def getSBOutFiles(self):
        return DataTask.getSBOutFiles(self) + utils.QM(
            self.gzipOut, ['cmssw.log.gz'], []) + ['cmssw.dbs.tar.gz']

    def getCommand(self):
        return './gc-run.cmssw.sh $@'

    def getJobArguments(self, jobNum):
        return DataTask.getJobArguments(self, jobNum) + ' ' + self.arguments

    def getVarNames(self):
        result = DataTask.getVarNames(self)
        if self.dataSplitter is None:
            result.append('MAX_EVENTS')
        return result

    # Get job dependent environment variables
    def getJobConfig(self, jobNum):
        data = DataTask.getJobConfig(self, jobNum)
        if self.dataSplitter is None:
            data['MAX_EVENTS'] = self.eventsPerJob
        return data

    def getDescription(self, jobNum):  # (task name, job name, type)
        result = DataTask.getDescription(self, jobNum)
        if not result.jobType:
            result.jobType = 'analysis'
        return result

    def getDependencies(self):
        return DataTask.getDependencies(self) + ['cmssw']
示例#7
0
class CMSSW(SCRAMTask):
	configSections = SCRAMTask.configSections + ['CMSSW']

	def __init__(self, config, name):
		config.set('se input timeout', '0:30')
		config.set('dataset provider', 'DBS3Provider')
		config.set('dataset splitter', 'EventBoundarySplitter')
		config.set('dataset processor', 'LumiDataProcessor', '+=')
		config.set('partition processor', 'TFCPartitionProcessor LocationPartitionProcessor MetaPartitionProcessor ' +
			'LFNPartitionProcessor LumiPartitionProcessor CMSSWPartitionProcessor')
		dash_config = config.changeView(viewClass = 'SimpleConfigView', setSections = ['dashboard'])
		dash_config.set('application', 'cmsRun')
		SCRAMTask.__init__(self, config, name)
		if self._scramProject != 'CMSSW':
			raise ConfigError('Project area contains no CMSSW project')

		self._oldReleaseTop = None
		if self._projectArea:
			self._oldReleaseTop = self._parse_scram_file(os.path.join(self._projectArea, '.SCRAM', self._scramArch, 'Environment')).get('RELEASETOP', None)

		self.updateErrorDict(utils.pathShare('gc-run.cmssw.sh', pkg = 'grid_control_cms'))

		self._projectAreaTarballSE = config.getBool(['se runtime', 'se project area'], True)
		self._projectAreaTarball = config.getWorkPath('cmssw-project-area.tar.gz')

		# Prolog / Epilog script support - warn about old syntax
		self.prolog = TaskExecutableWrapper(config, 'prolog', '')
		self.epilog = TaskExecutableWrapper(config, 'epilog', '')
		if config.getPaths('executable', []) != []:
			raise ConfigError('Prefix executable and argument options with either prolog or epilog!')
		self.arguments = config.get('arguments', '')

		# Get cmssw config files and check their existance
		# Check that for dataset jobs the necessary placeholders are in the config file
		if self._dataSplitter is None:
			self.eventsPerJob = config.get('events per job', '0') # this can be a variable like @USER_EVENTS@!
		fragment = config.getPath('instrumentation fragment', utils.pathShare('fragmentForCMSSW.py', pkg = 'grid_control_cms'))
		self.configFiles = self._processConfigFiles(config, list(self._getConfigFiles(config)), fragment,
			autoPrepare = config.getBool('instrumentation', True),
			mustPrepare = (self._dataSplitter is not None))

		# Create project area tarball
		if self._projectArea and not os.path.exists(self._projectAreaTarball):
			config.setState(True, 'init', detail = 'sandbox')
		# Information about search order for software environment
		self.searchLoc = self._getCMSSWPaths(config)
		if config.getState('init', detail = 'sandbox'):
			if os.path.exists(self._projectAreaTarball):
				if not utils.getUserBool('CMSSW tarball already exists! Do you want to regenerate it?', True):
					return
			# Generate CMSSW tarball
			if self._projectArea:
				utils.genTarball(self._projectAreaTarball, utils.matchFiles(self._projectArea, self._projectAreaPattern))
			if self._projectAreaTarballSE:
				config.setState(True, 'init', detail = 'storage')


	def _getCMSSWPaths(self, config):
		result = []
		userPath = config.get(['cmssw dir', 'vo software dir'], '')
		if userPath:
			userPathLocal = os.path.abspath(utils.cleanPath(userPath))
			if os.path.exists(userPathLocal):
				userPath = userPathLocal
		if userPath:
			result.append(('CMSSW_DIR_USER', userPath))
		if self._oldReleaseTop:
			projPath = os.path.normpath('%s/../../../../' % self._oldReleaseTop)
			result.append(('CMSSW_DIR_PRO', projPath))
		log = logging.getLogger('user')
		log.info('Local jobs will try to use the CMSSW software located here:')
		for i, loc in enumerate(result):
			log.info(' %i) %s', i + 1, loc[1])
		if result:
			log.info('')
		return result


	def _getConfigFiles(self, config):
		cfgDefault = utils.QM(self.prolog.isActive() or self.epilog.isActive(), [], noDefault)
		for cfgFile in config.getPaths('config file', cfgDefault, mustExist = False):
			if not os.path.exists(cfgFile):
				raise ConfigError('Config file %r not found.' % cfgFile)
			yield cfgFile


	def _cfgIsInstrumented(self, fn):
		fp = open(fn, 'r')
		try:
			cfg = fp.read()
		finally:
			fp.close()
		for tag in self.neededVars():
			if (not '__%s__' % tag in cfg) and (not '@%s@' % tag in cfg):
				return False
		return True


	def _cfgStore(self, source, target, fragment_path = None):
		fp = open(source, 'r')
		try:
			content = fp.read()
		finally:
			fp.close()
		fp = open(target, 'w')
		try:
			fp.write(content)
			if fragment_path:
				logging.getLogger('user').info('Instrumenting... %s', os.path.basename(source))
				fragment_fp = open(fragment_path, 'r')
				fp.write(fragment_fp.read())
				fragment_fp.close()
		finally:
			fp.close()


	def _cfgFindUninitialized(self, config, cfgFiles, autoPrepare, mustPrepare):
		comPath = os.path.dirname(os.path.commonprefix(cfgFiles))

		cfgTodo = []
		cfgStatus = []
		for cfg in cfgFiles:
			cfg_new = config.getWorkPath(os.path.basename(cfg))
			cfg_new_exists = os.path.exists(cfg_new)
			if cfg_new_exists:
				isInstrumented = self._cfgIsInstrumented(cfg_new)
				doCopy = False
			else:
				isInstrumented = self._cfgIsInstrumented(cfg)
				doCopy = True
			doPrepare = (mustPrepare or autoPrepare) and not isInstrumented
			doCopy = doCopy or doPrepare
			if doCopy:
				cfgTodo.append((cfg, cfg_new, doPrepare))
			cfgStatus.append({1: cfg.split(comPath, 1)[1].lstrip('/'), 2: cfg_new_exists,
				3: isInstrumented, 4: doPrepare})

		if cfgStatus:
			utils.printTabular([(1, 'Config file'), (2, 'Work dir'), (3, 'Instrumented'), (4, 'Scheduled')], cfgStatus, 'lccc')
		return cfgTodo


	def _processConfigFiles(self, config, cfgFiles, fragment_path, autoPrepare, mustPrepare):
		# process list of uninitialized config files
		for (cfg, cfg_new, doPrepare) in self._cfgFindUninitialized(config, cfgFiles, autoPrepare, mustPrepare):
			if doPrepare and (autoPrepare or utils.getUserBool('Do you want to prepare %s for running over the dataset?' % cfg, True)):
				self._cfgStore(cfg, cfg_new, fragment_path)
			else:
				self._cfgStore(cfg, cfg_new)

		result = []
		for cfg in cfgFiles:
			cfg_new = config.getWorkPath(os.path.basename(cfg))
			if not os.path.exists(cfg_new):
				raise ConfigError('Config file %r was not copied to the work directory!' % cfg)
			isInstrumented = self._cfgIsInstrumented(cfg_new)
			if mustPrepare and not isInstrumented:
				raise ConfigError('Config file %r must use %s to work properly!' %
					(cfg, str.join(', ', imap(lambda x: '@%s@' % x, self.neededVars()))))
			if autoPrepare and not isInstrumented:
				self._log.warning('Config file %r was not instrumented!', cfg)
			result.append(cfg_new)
		return result


	def neededVars(self):
		if self._dataSplitter:
			return self._partProcessor.getNeededKeys(self._dataSplitter) or []
		return ['MAX_EVENTS']


	# Get environment variables for gc_config.sh
	def getTaskConfig(self):
		data = SCRAMTask.getTaskConfig(self)
		data.update(dict(self.searchLoc))
		data['GZIP_OUT'] = utils.QM(self.gzipOut, 'yes', 'no')
		data['SE_RUNTIME'] = utils.QM(self._projectAreaTarballSE, 'yes', 'no')
		data['HAS_RUNTIME'] = utils.QM(self._projectArea, 'yes', 'no')
		data['CMSSW_EXEC'] = 'cmsRun'
		data['CMSSW_CONFIG'] = str.join(' ', imap(os.path.basename, self.configFiles))
		data['CMSSW_OLD_RELEASETOP'] = self._oldReleaseTop
		if self.prolog.isActive():
			data['CMSSW_PROLOG_EXEC'] = self.prolog.getCommand()
			data['CMSSW_PROLOG_SB_IN_FILES'] = str.join(' ', imap(lambda x: x.pathRel, self.prolog.getSBInFiles()))
			data['CMSSW_PROLOG_ARGS'] = self.prolog.getArguments()
		if self.epilog.isActive():
			data['CMSSW_EPILOG_EXEC'] = self.epilog.getCommand()
			data['CMSSW_EPILOG_SB_IN_FILES'] = str.join(' ', imap(lambda x: x.pathRel, self.epilog.getSBInFiles()))
			data['CMSSW_EPILOG_ARGS'] = self.epilog.getArguments()
		return data


	# Get files to be transfered via SE (description, source, target)
	def getSEInFiles(self):
		files = SCRAMTask.getSEInFiles(self)
		if self._projectArea and self._projectAreaTarballSE:
			return files + [('CMSSW tarball', self._projectAreaTarball, self.taskID + '.tar.gz')]
		return files


	# Get files for input sandbox
	def getSBInFiles(self):
		files = SCRAMTask.getSBInFiles(self) + self.prolog.getSBInFiles() + self.epilog.getSBInFiles()
		for cfgFile in self.configFiles:
			files.append(utils.Result(pathAbs = cfgFile, pathRel = os.path.basename(cfgFile)))
		if self._projectArea and not self._projectAreaTarballSE:
			files.append(utils.Result(pathAbs = self._projectAreaTarball, pathRel = os.path.basename(self._projectAreaTarball)))
		return files + [utils.Result(pathAbs = utils.pathShare('gc-run.cmssw.sh', pkg = 'grid_control_cms'), pathRel = 'gc-run.cmssw.sh')]


	# Get files for output sandbox
	def getSBOutFiles(self):
		if not self.configFiles:
			return SCRAMTask.getSBOutFiles(self)
		return SCRAMTask.getSBOutFiles(self) + utils.QM(self.gzipOut, ['cmssw.log.gz'], []) + ['cmssw.dbs.tar.gz']


	def getCommand(self):
		return './gc-run.cmssw.sh $@'


	def getJobArguments(self, jobNum):
		return SCRAMTask.getJobArguments(self, jobNum) + ' ' + self.arguments


	def getVarNames(self):
		result = SCRAMTask.getVarNames(self)
		if self._dataSplitter is None:
			result.append('MAX_EVENTS')
		return result


	# Get job dependent environment variables
	def getJobConfig(self, jobNum):
		data = SCRAMTask.getJobConfig(self, jobNum)
		if self._dataSplitter is None:
			data['MAX_EVENTS'] = self.eventsPerJob
		return data


	def getDescription(self, jobNum): # (task name, job name, type)
		result = SCRAMTask.getDescription(self, jobNum)
		if not result.jobType:
			result.jobType = 'analysis'
		return result