Exemplo n.º 1
0
class UserTask(DataTask):
    alias_list = ['UserMod', 'user', 'script']
    config_section_list = DataTask.config_section_list + [
        'UserMod', 'UserTask'
    ]

    def __init__(self, config, name):
        DataTask.__init__(self, config, name)
        self._exe = TaskExecutableWrapper(config)

    def get_command(self):
        return '(%s) > job.stdout 2> job.stderr' % self._exe.get_command()

    def get_job_arguments(self, jobnum):
        return DataTask.get_job_arguments(
            self, jobnum) + ' ' + self._exe.get_arguments()

    def get_sb_in_fpi_list(self):
        return DataTask.get_sb_in_fpi_list(
            self) + self._exe.get_sb_in_fpi_list()

    def get_sb_out_fn_list(self):
        job_out_fn_list = ['job.stdout', 'job.stderr']
        if self._do_gzip_std_output:
            job_out_fn_list = lmap(lambda fn: fn + '.gz', job_out_fn_list)
        return DataTask.get_sb_out_fn_list(self) + job_out_fn_list
Exemplo n.º 2
0
	def __init__(self, config, name):
		config.set('se input timeout', '0:30')
		config.set('dataset provider', 'DBS3Provider')
		config.set('dataset splitter', 'EventBoundarySplitter')
		config.set('dataset processor', 'LumiDataProcessor', '+=')
		config.set('partition processor', 'TFCPartitionProcessor LocationPartitionProcessor MetaPartitionProcessor ' +
			'LFNPartitionProcessor LumiPartitionProcessor CMSSWPartitionProcessor')
		dash_config = config.changeView(viewClass = 'SimpleConfigView', setSections = ['dashboard'])
		dash_config.set('application', 'cmsRun')
		SCRAMTask.__init__(self, config, name)
		if self._scramProject != 'CMSSW':
			raise ConfigError('Project area contains no CMSSW project')

		self._oldReleaseTop = None
		if self._projectArea:
			self._oldReleaseTop = self._parse_scram_file(os.path.join(self._projectArea, '.SCRAM', self._scramArch, 'Environment')).get('RELEASETOP', None)

		self.updateErrorDict(utils.pathShare('gc-run.cmssw.sh', pkg = 'grid_control_cms'))

		self._projectAreaTarballSE = config.getBool(['se runtime', 'se project area'], True)
		self._projectAreaTarball = config.getWorkPath('cmssw-project-area.tar.gz')

		# Prolog / Epilog script support - warn about old syntax
		self.prolog = TaskExecutableWrapper(config, 'prolog', '')
		self.epilog = TaskExecutableWrapper(config, 'epilog', '')
		if config.getPaths('executable', []) != []:
			raise ConfigError('Prefix executable and argument options with either prolog or epilog!')
		self.arguments = config.get('arguments', '')

		# Get cmssw config files and check their existance
		# Check that for dataset jobs the necessary placeholders are in the config file
		if self._dataSplitter is None:
			self.eventsPerJob = config.get('events per job', '0') # this can be a variable like @USER_EVENTS@!
		fragment = config.getPath('instrumentation fragment', utils.pathShare('fragmentForCMSSW.py', pkg = 'grid_control_cms'))
		self.configFiles = self._processConfigFiles(config, list(self._getConfigFiles(config)), fragment,
			autoPrepare = config.getBool('instrumentation', True),
			mustPrepare = (self._dataSplitter is not None))

		# Create project area tarball
		if self._projectArea and not os.path.exists(self._projectAreaTarball):
			config.setState(True, 'init', detail = 'sandbox')
		# Information about search order for software environment
		self.searchLoc = self._getCMSSWPaths(config)
		if config.getState('init', detail = 'sandbox'):
			if os.path.exists(self._projectAreaTarball):
				if not utils.getUserBool('CMSSW tarball already exists! Do you want to regenerate it?', True):
					return
			# Generate CMSSW tarball
			if self._projectArea:
				utils.genTarball(self._projectAreaTarball, utils.matchFiles(self._projectArea, self._projectAreaPattern))
			if self._projectAreaTarballSE:
				config.setState(True, 'init', detail = 'storage')
Exemplo n.º 3
0
	def __init__(self, config, name):
		config.set('se input timeout', '0:30')
		config.set('dataset provider', 'DBS3Provider')
		config.set('dataset splitter', 'EventBoundarySplitter')
		config.set('dataset processor', 'LumiDataProcessor', '+=')
		config.set('partition processor', 'TFCPartitionProcessor LocationPartitionProcessor MetaPartitionProcessor ' +
			'LFNPartitionProcessor LumiPartitionProcessor CMSSWPartitionProcessor')
		dash_config = config.changeView(viewClass = 'SimpleConfigView', setSections = ['dashboard'])
		dash_config.set('application', 'cmsRun')
		SCRAMTask.__init__(self, config, name)
		if self._scramProject != 'CMSSW':
			raise ConfigError('Project area contains no CMSSW project')

		self._oldReleaseTop = None
		if self._projectArea:
			self._oldReleaseTop = self._parse_scram_file(os.path.join(self._projectArea, '.SCRAM', self._scramArch, 'Environment')).get('RELEASETOP', None)

		self.updateErrorDict(utils.pathShare('gc-run.cmssw.sh', pkg = 'grid_control_cms'))

		self._projectAreaTarballSE = config.getBool(['se runtime', 'se project area'], True)
		self._projectAreaTarball = config.getWorkPath('cmssw-project-area.tar.gz')

		# Prolog / Epilog script support - warn about old syntax
		self.prolog = TaskExecutableWrapper(config, 'prolog', '')
		self.epilog = TaskExecutableWrapper(config, 'epilog', '')
		if config.getPaths('executable', []) != []:
			raise ConfigError('Prefix executable and argument options with either prolog or epilog!')
		self.arguments = config.get('arguments', '')

		# Get cmssw config files and check their existance
		# Check that for dataset jobs the necessary placeholders are in the config file
		if self._dataSplitter is None:
			self.eventsPerJob = config.get('events per job', '0') # this can be a variable like @USER_EVENTS@!
		fragment = config.getPath('instrumentation fragment', utils.pathShare('fragmentForCMSSW.py', pkg = 'grid_control_cms'))
		self.configFiles = self._processConfigFiles(config, list(self._getConfigFiles(config)), fragment,
			autoPrepare = config.getBool('instrumentation', True),
			mustPrepare = (self._dataSplitter is not None))

		# Create project area tarball
		if self._projectArea and not os.path.exists(self._projectAreaTarball):
			config.setState(True, 'init', detail = 'sandbox')
		# Information about search order for software environment
		self.searchLoc = self._getCMSSWPaths(config)
		if config.getState('init', detail = 'sandbox'):
			if os.path.exists(self._projectAreaTarball):
				if not utils.getUserBool('CMSSW tarball already exists! Do you want to regenerate it?', True):
					return
			# Generate CMSSW tarball
			if self._projectArea:
				utils.genTarball(self._projectAreaTarball, utils.matchFiles(self._projectArea, self._projectAreaPattern))
			if self._projectAreaTarballSE:
				config.setState(True, 'init', detail = 'storage')
Exemplo n.º 4
0
	def __init__(self, config, name):
		config.set('se input timeout', '0:30')
		config.set('dataset provider', 'DBS3Provider')
		config.set('dataset splitter', 'EventBoundarySplitter')
		config.set('partition processor', 'CMSPartitionProcessor LocationPartitionProcessor LumiPartitionProcessor')
		config.set('dataset processor', 'LumiDataProcessor', '+=')
		DataTask.__init__(self, config, name)
		self.updateErrorDict(utils.pathShare('gc-run.cmssw.sh', pkg = 'grid_control_cms'))

		# SCRAM settings
		self._configureSCRAMSettings(config)

		self.useReqs = config.getBool('software requirements', True, onChange = None)
		self._projectAreaTarballSE = config.getBool(['se project area', 'se runtime'], True)
		self._projectAreaTarball = config.getWorkPath('cmssw-project-area.tar.gz')

		# Information about search order for software environment
		self.searchLoc = self._getCMSSWPaths(config)
		# Prolog / Epilog script support - warn about old syntax
		self.prolog = TaskExecutableWrapper(config, 'prolog', '')
		self.epilog = TaskExecutableWrapper(config, 'epilog', '')
		if config.getPaths('executable', []) != []:
			raise ConfigError('Prefix executable and argument options with either prolog or epilog!')
		self.arguments = config.get('arguments', '')

		# Get cmssw config files and check their existance
		# Check that for dataset jobs the necessary placeholders are in the config file
		if self.dataSplitter is None:
			self.eventsPerJob = config.get('events per job', '0')
		fragment = config.getPath('instrumentation fragment', utils.pathShare('fragmentForCMSSW.py', pkg = 'grid_control_cms'))
		self.configFiles = self._processConfigFiles(config, list(self._getConfigFiles(config)), fragment,
			autoPrepare = config.getBool('instrumentation', True),
			mustPrepare = (self.dataSplitter is not None))

		# Create project area tarball
		if not os.path.exists(self._projectAreaTarball):
			config.setState(True, 'init', detail = 'sandbox')
		if config.getState('init', detail = 'sandbox'):
			if os.path.exists(self._projectAreaTarball):
				if not utils.getUserBool('CMSSW tarball already exists! Do you want to regenerate it?', True):
					return
			# Generate CMSSW tarball
			if self.projectArea:
				utils.genTarball(self._projectAreaTarball, utils.matchFiles(self.projectArea, self.pattern))
			if self._projectAreaTarballSE:
				config.setState(True, 'init', detail = 'storage')
Exemplo n.º 5
0
class UserTask(DataTask):
	alias_list = ['UserMod', 'user', 'script']
	config_section_list = DataTask.config_section_list + ['UserMod', 'UserTask']

	def __init__(self, config, name):
		DataTask.__init__(self, config, name)
		self._exe = TaskExecutableWrapper(config)

	def get_command(self):
		return '(%s) > job.stdout 2> job.stderr' % self._exe.get_command()

	def get_job_arguments(self, jobnum):
		return DataTask.get_job_arguments(self, jobnum) + ' ' + self._exe.get_arguments()

	def get_sb_in_fpi_list(self):
		return DataTask.get_sb_in_fpi_list(self) + self._exe.get_sb_in_fpi_list()

	def get_sb_out_fn_list(self):
		job_out_fn_list = ['job.stdout', 'job.stderr']
		if self._do_gzip_std_output:
			job_out_fn_list = lmap(lambda fn: fn + '.gz', job_out_fn_list)
		return DataTask.get_sb_out_fn_list(self) + job_out_fn_list
Exemplo n.º 6
0
class UserTask(DataTask):
    alias = ['UserMod']
    configSections = DataTask.configSections + ['UserMod', 'UserTask']

    def __init__(self, config, name):
        DataTask.__init__(self, config, name)
        self._exeWrap = TaskExecutableWrapper(config)

    def getCommand(self):
        return '(%s) > job.stdout 2> job.stderr' % self._exeWrap.getCommand()

    def getJobArguments(self, jobNum):
        return DataTask.getJobArguments(
            self, jobNum) + ' ' + self._exeWrap.getArguments()

    def getSBInFiles(self):
        return DataTask.getSBInFiles(self) + self._exeWrap.getSBInFiles()

    def getSBOutFiles(self):
        tmp = lmap(lambda s: s + utils.QM(self.gzipOut, '.gz', ''),
                   ['job.stdout', 'job.stderr'])
        return DataTask.getSBOutFiles(self) + tmp
Exemplo n.º 7
0
class UserTask(DataTask):
	configSections = DataTask.configSections + ['UserTask']

	def __init__(self, config, name):
		DataTask.__init__(self, config, name)
		self._exeWrap = TaskExecutableWrapper(config)


	def getCommand(self):
		return '(%s) > job.stdout 2> job.stderr' % self._exeWrap.getCommand()


	def getJobArguments(self, jobNum):
		return DataTask.getJobArguments(self, jobNum) + ' ' + self._exeWrap.getArguments()


	def getSBInFiles(self):
		return DataTask.getSBInFiles(self) + self._exeWrap.getSBInFiles()


	def getSBOutFiles(self):
		tmp = map(lambda s: s + utils.QM(self.gzipOut, '.gz', ''), ['job.stdout', 'job.stderr'])
		return DataTask.getSBOutFiles(self) + tmp
Exemplo n.º 8
0
class CMSSW(SCRAMTask):
	configSections = SCRAMTask.configSections + ['CMSSW']

	def __init__(self, config, name):
		config.set('se input timeout', '0:30')
		config.set('dataset provider', 'DBS3Provider')
		config.set('dataset splitter', 'EventBoundarySplitter')
		config.set('dataset processor', 'LumiDataProcessor', '+=')
		config.set('partition processor', 'TFCPartitionProcessor LocationPartitionProcessor MetaPartitionProcessor ' +
			'LFNPartitionProcessor LumiPartitionProcessor CMSSWPartitionProcessor')
		dash_config = config.changeView(viewClass = 'SimpleConfigView', setSections = ['dashboard'])
		dash_config.set('application', 'cmsRun')
		SCRAMTask.__init__(self, config, name)
		if self._scramProject != 'CMSSW':
			raise ConfigError('Project area contains no CMSSW project')

		self._oldReleaseTop = None
		if self._projectArea:
			self._oldReleaseTop = self._parse_scram_file(os.path.join(self._projectArea, '.SCRAM', self._scramArch, 'Environment')).get('RELEASETOP', None)

		self.updateErrorDict(utils.pathShare('gc-run.cmssw.sh', pkg = 'grid_control_cms'))

		self._projectAreaTarballSE = config.getBool(['se runtime', 'se project area'], True)
		self._projectAreaTarball = config.getWorkPath('cmssw-project-area.tar.gz')

		# Prolog / Epilog script support - warn about old syntax
		self.prolog = TaskExecutableWrapper(config, 'prolog', '')
		self.epilog = TaskExecutableWrapper(config, 'epilog', '')
		if config.getPaths('executable', []) != []:
			raise ConfigError('Prefix executable and argument options with either prolog or epilog!')
		self.arguments = config.get('arguments', '')

		# Get cmssw config files and check their existance
		# Check that for dataset jobs the necessary placeholders are in the config file
		if self._dataSplitter is None:
			self.eventsPerJob = config.get('events per job', '0') # this can be a variable like @USER_EVENTS@!
		fragment = config.getPath('instrumentation fragment', utils.pathShare('fragmentForCMSSW.py', pkg = 'grid_control_cms'))
		self.configFiles = self._processConfigFiles(config, list(self._getConfigFiles(config)), fragment,
			autoPrepare = config.getBool('instrumentation', True),
			mustPrepare = (self._dataSplitter is not None))

		# Create project area tarball
		if self._projectArea and not os.path.exists(self._projectAreaTarball):
			config.setState(True, 'init', detail = 'sandbox')
		# Information about search order for software environment
		self.searchLoc = self._getCMSSWPaths(config)
		if config.getState('init', detail = 'sandbox'):
			if os.path.exists(self._projectAreaTarball):
				if not utils.getUserBool('CMSSW tarball already exists! Do you want to regenerate it?', True):
					return
			# Generate CMSSW tarball
			if self._projectArea:
				utils.genTarball(self._projectAreaTarball, utils.matchFiles(self._projectArea, self._projectAreaPattern))
			if self._projectAreaTarballSE:
				config.setState(True, 'init', detail = 'storage')


	def _getCMSSWPaths(self, config):
		result = []
		userPath = config.get(['cmssw dir', 'vo software dir'], '')
		if userPath:
			userPathLocal = os.path.abspath(utils.cleanPath(userPath))
			if os.path.exists(userPathLocal):
				userPath = userPathLocal
		if userPath:
			result.append(('CMSSW_DIR_USER', userPath))
		if self._oldReleaseTop:
			projPath = os.path.normpath('%s/../../../../' % self._oldReleaseTop)
			result.append(('CMSSW_DIR_PRO', projPath))
		log = logging.getLogger('user')
		log.info('Local jobs will try to use the CMSSW software located here:')
		for i, loc in enumerate(result):
			log.info(' %i) %s', i + 1, loc[1])
		if result:
			log.info('')
		return result


	def _getConfigFiles(self, config):
		cfgDefault = utils.QM(self.prolog.isActive() or self.epilog.isActive(), [], noDefault)
		for cfgFile in config.getPaths('config file', cfgDefault, mustExist = False):
			if not os.path.exists(cfgFile):
				raise ConfigError('Config file %r not found.' % cfgFile)
			yield cfgFile


	def _cfgIsInstrumented(self, fn):
		fp = open(fn, 'r')
		try:
			cfg = fp.read()
		finally:
			fp.close()
		for tag in self.neededVars():
			if (not '__%s__' % tag in cfg) and (not '@%s@' % tag in cfg):
				return False
		return True


	def _cfgStore(self, source, target, fragment_path = None):
		fp = open(source, 'r')
		try:
			content = fp.read()
		finally:
			fp.close()
		fp = open(target, 'w')
		try:
			fp.write(content)
			if fragment_path:
				logging.getLogger('user').info('Instrumenting... %s', os.path.basename(source))
				fragment_fp = open(fragment_path, 'r')
				fp.write(fragment_fp.read())
				fragment_fp.close()
		finally:
			fp.close()


	def _cfgFindUninitialized(self, config, cfgFiles, autoPrepare, mustPrepare):
		comPath = os.path.dirname(os.path.commonprefix(cfgFiles))

		cfgTodo = []
		cfgStatus = []
		for cfg in cfgFiles:
			cfg_new = config.getWorkPath(os.path.basename(cfg))
			cfg_new_exists = os.path.exists(cfg_new)
			if cfg_new_exists:
				isInstrumented = self._cfgIsInstrumented(cfg_new)
				doCopy = False
			else:
				isInstrumented = self._cfgIsInstrumented(cfg)
				doCopy = True
			doPrepare = (mustPrepare or autoPrepare) and not isInstrumented
			doCopy = doCopy or doPrepare
			if doCopy:
				cfgTodo.append((cfg, cfg_new, doPrepare))
			cfgStatus.append({1: cfg.split(comPath, 1)[1].lstrip('/'), 2: cfg_new_exists,
				3: isInstrumented, 4: doPrepare})

		if cfgStatus:
			utils.printTabular([(1, 'Config file'), (2, 'Work dir'), (3, 'Instrumented'), (4, 'Scheduled')], cfgStatus, 'lccc')
		return cfgTodo


	def _processConfigFiles(self, config, cfgFiles, fragment_path, autoPrepare, mustPrepare):
		# process list of uninitialized config files
		for (cfg, cfg_new, doPrepare) in self._cfgFindUninitialized(config, cfgFiles, autoPrepare, mustPrepare):
			if doPrepare and (autoPrepare or utils.getUserBool('Do you want to prepare %s for running over the dataset?' % cfg, True)):
				self._cfgStore(cfg, cfg_new, fragment_path)
			else:
				self._cfgStore(cfg, cfg_new)

		result = []
		for cfg in cfgFiles:
			cfg_new = config.getWorkPath(os.path.basename(cfg))
			if not os.path.exists(cfg_new):
				raise ConfigError('Config file %r was not copied to the work directory!' % cfg)
			isInstrumented = self._cfgIsInstrumented(cfg_new)
			if mustPrepare and not isInstrumented:
				raise ConfigError('Config file %r must use %s to work properly!' %
					(cfg, str.join(', ', imap(lambda x: '@%s@' % x, self.neededVars()))))
			if autoPrepare and not isInstrumented:
				self._log.warning('Config file %r was not instrumented!', cfg)
			result.append(cfg_new)
		return result


	def neededVars(self):
		if self._dataSplitter:
			return self._partProcessor.getNeededKeys(self._dataSplitter) or []
		return ['MAX_EVENTS']


	# Get environment variables for gc_config.sh
	def getTaskConfig(self):
		data = SCRAMTask.getTaskConfig(self)
		data.update(dict(self.searchLoc))
		data['GZIP_OUT'] = utils.QM(self.gzipOut, 'yes', 'no')
		data['SE_RUNTIME'] = utils.QM(self._projectAreaTarballSE, 'yes', 'no')
		data['HAS_RUNTIME'] = utils.QM(self._projectArea, 'yes', 'no')
		data['CMSSW_EXEC'] = 'cmsRun'
		data['CMSSW_CONFIG'] = str.join(' ', imap(os.path.basename, self.configFiles))
		data['CMSSW_OLD_RELEASETOP'] = self._oldReleaseTop
		if self.prolog.isActive():
			data['CMSSW_PROLOG_EXEC'] = self.prolog.getCommand()
			data['CMSSW_PROLOG_SB_IN_FILES'] = str.join(' ', imap(lambda x: x.pathRel, self.prolog.getSBInFiles()))
			data['CMSSW_PROLOG_ARGS'] = self.prolog.getArguments()
		if self.epilog.isActive():
			data['CMSSW_EPILOG_EXEC'] = self.epilog.getCommand()
			data['CMSSW_EPILOG_SB_IN_FILES'] = str.join(' ', imap(lambda x: x.pathRel, self.epilog.getSBInFiles()))
			data['CMSSW_EPILOG_ARGS'] = self.epilog.getArguments()
		return data


	# Get files to be transfered via SE (description, source, target)
	def getSEInFiles(self):
		files = SCRAMTask.getSEInFiles(self)
		if self._projectArea and self._projectAreaTarballSE:
			return files + [('CMSSW tarball', self._projectAreaTarball, self.taskID + '.tar.gz')]
		return files


	# Get files for input sandbox
	def getSBInFiles(self):
		files = SCRAMTask.getSBInFiles(self) + self.prolog.getSBInFiles() + self.epilog.getSBInFiles()
		for cfgFile in self.configFiles:
			files.append(utils.Result(pathAbs = cfgFile, pathRel = os.path.basename(cfgFile)))
		if self._projectArea and not self._projectAreaTarballSE:
			files.append(utils.Result(pathAbs = self._projectAreaTarball, pathRel = os.path.basename(self._projectAreaTarball)))
		return files + [utils.Result(pathAbs = utils.pathShare('gc-run.cmssw.sh', pkg = 'grid_control_cms'), pathRel = 'gc-run.cmssw.sh')]


	# Get files for output sandbox
	def getSBOutFiles(self):
		if not self.configFiles:
			return SCRAMTask.getSBOutFiles(self)
		return SCRAMTask.getSBOutFiles(self) + utils.QM(self.gzipOut, ['cmssw.log.gz'], []) + ['cmssw.dbs.tar.gz']


	def getCommand(self):
		return './gc-run.cmssw.sh $@'


	def getJobArguments(self, jobNum):
		return SCRAMTask.getJobArguments(self, jobNum) + ' ' + self.arguments


	def getVarNames(self):
		result = SCRAMTask.getVarNames(self)
		if self._dataSplitter is None:
			result.append('MAX_EVENTS')
		return result


	# Get job dependent environment variables
	def getJobConfig(self, jobNum):
		data = SCRAMTask.getJobConfig(self, jobNum)
		if self._dataSplitter is None:
			data['MAX_EVENTS'] = self.eventsPerJob
		return data


	def getDescription(self, jobNum): # (task name, job name, type)
		result = SCRAMTask.getDescription(self, jobNum)
		if not result.jobType:
			result.jobType = 'analysis'
		return result
Exemplo n.º 9
0
	def __init__(self, config, name):
		config.set('se input timeout', '0:30')
		config.set('application', 'cmsRun', section='dashboard')
		config.set('dataset provider', 'DBS3Provider')
		config.set('dataset splitter', 'EventBoundarySplitter')
		config.set('dataset processor', 'LumiDataProcessor', '+=')
		config.set('partition processor', 'BasicPartitionProcessor', '-=')
		config.set('partition processor',
			'LFNPartitionProcessor LumiPartitionProcessor CMSSWPartitionProcessor', '+=')

		self._needed_vn_set = set()
		SCRAMTask.__init__(self, config, name)
		self._uii = UserInputInterface()

		# Setup file path informations
		self._cmsrun_output_files = ['cmssw.dbs.tar.gz']
		if self._do_gzip_std_output:
			self._cmsrun_output_files.append('cmssw.log.gz')
		self._script_fpi = Result(path_rel='gc-run.cmssw.sh',
			path_abs=get_path_share('gc-run.cmssw.sh', pkg='grid_control_cms'))

		if self._scram_project != 'CMSSW':
			raise ConfigError('Project area contains no CMSSW project')

		self._old_release_top = None
		if self._project_area:
			scram_arch_env_path = os.path.join(self._project_area, '.SCRAM', self._scram_arch, 'Environment')
			self._old_release_top = self._parse_scram_file(scram_arch_env_path).get('RELEASETOP')

		self._update_map_error_code2msg(
			get_path_share('gc-run.cmssw.sh', pkg='grid_control_cms'))

		self._project_area_tarball_on_se = config.get_bool(['se runtime', 'se project area'], True)
		self._project_area_tarball = config.get_work_path('cmssw-project-area.tar.gz')

		# Prolog / Epilog script support - warn about old syntax
		self.prolog = TaskExecutableWrapper(config, 'prolog', '')
		self.epilog = TaskExecutableWrapper(config, 'epilog', '')
		if config.get_fn_list('executable', []) != []:
			raise ConfigError('Prefix executable and argument options with either prolog or epilog!')
		self.arguments = config.get('arguments', '')

		# Get cmssw config files and check their existance
		# Check that for dataset jobs the necessary placeholders are in the config file
		if not self._has_dataset:
			self._events_per_job = config.get('events per job', '0')
			# this can be a variable like @USER_EVENTS@!
			self._needed_vn_set.add('MAX_EVENTS')
		fragment = config.get_fn('instrumentation fragment',
			get_path_share('fragmentForCMSSW.py', pkg='grid_control_cms'))
		self._config_fn_list = self._process_config_file_list(config,
			config.get_fn_list('config file', self._get_config_file_default()),
			fragment, auto_prepare=config.get_bool('instrumentation', True),
			must_prepare=self._has_dataset)

		# Create project area tarball
		if self._project_area and not os.path.exists(self._project_area_tarball):
			config.set_state(True, 'init', detail='sandbox')
		# Information about search order for software environment
		self._cmssw_search_dict = self._get_cmssw_path_list(config)
		if config.get_state('init', detail='sandbox'):
			msg = 'CMSSW tarball already exists! Do you want to regenerate it?'
			if os.path.exists(self._project_area_tarball) and not self._uii.prompt_bool(msg, True):
				return
			# Generate CMSSW tarball
			if self._project_area:
				create_tarball(_match_files(self._project_area,
					self._project_area_matcher, self._always_matcher.create_matcher(''),
					self._project_area_base_fn), name=self._project_area_tarball)
			if self._project_area_tarball_on_se:
				config.set_state(True, 'init', detail='storage')
Exemplo n.º 10
0
class CMSSW(SCRAMTask):
	alias_list = ['']
	config_section_list = SCRAMTask.config_section_list + ['CMSSW']

	def __init__(self, config, name):
		config.set('se input timeout', '0:30')
		config.set('application', 'cmsRun', section='dashboard')
		config.set('dataset provider', 'DBS3Provider')
		config.set('dataset splitter', 'EventBoundarySplitter')
		config.set('dataset processor', 'LumiDataProcessor', '+=')
		config.set('partition processor', 'BasicPartitionProcessor', '-=')
		config.set('partition processor',
			'LFNPartitionProcessor LumiPartitionProcessor CMSSWPartitionProcessor', '+=')

		self._needed_vn_set = set()
		SCRAMTask.__init__(self, config, name)
		self._uii = UserInputInterface()

		# Setup file path informations
		self._cmsrun_output_files = ['cmssw.dbs.tar.gz']
		if self._do_gzip_std_output:
			self._cmsrun_output_files.append('cmssw.log.gz')
		self._script_fpi = Result(path_rel='gc-run.cmssw.sh',
			path_abs=get_path_share('gc-run.cmssw.sh', pkg='grid_control_cms'))

		if self._scram_project != 'CMSSW':
			raise ConfigError('Project area contains no CMSSW project')

		self._old_release_top = None
		if self._project_area:
			scram_arch_env_path = os.path.join(self._project_area, '.SCRAM', self._scram_arch, 'Environment')
			self._old_release_top = self._parse_scram_file(scram_arch_env_path).get('RELEASETOP')

		self._update_map_error_code2msg(
			get_path_share('gc-run.cmssw.sh', pkg='grid_control_cms'))

		self._project_area_tarball_on_se = config.get_bool(['se runtime', 'se project area'], True)
		self._project_area_tarball = config.get_work_path('cmssw-project-area.tar.gz')

		# Prolog / Epilog script support - warn about old syntax
		self.prolog = TaskExecutableWrapper(config, 'prolog', '')
		self.epilog = TaskExecutableWrapper(config, 'epilog', '')
		if config.get_fn_list('executable', []) != []:
			raise ConfigError('Prefix executable and argument options with either prolog or epilog!')
		self.arguments = config.get('arguments', '')

		# Get cmssw config files and check their existance
		# Check that for dataset jobs the necessary placeholders are in the config file
		if not self._has_dataset:
			self._events_per_job = config.get('events per job', '0')
			# this can be a variable like @USER_EVENTS@!
			self._needed_vn_set.add('MAX_EVENTS')
		fragment = config.get_fn('instrumentation fragment',
			get_path_share('fragmentForCMSSW.py', pkg='grid_control_cms'))
		self._config_fn_list = self._process_config_file_list(config,
			config.get_fn_list('config file', self._get_config_file_default()),
			fragment, auto_prepare=config.get_bool('instrumentation', True),
			must_prepare=self._has_dataset)

		# Create project area tarball
		if self._project_area and not os.path.exists(self._project_area_tarball):
			config.set_state(True, 'init', detail='sandbox')
		# Information about search order for software environment
		self._cmssw_search_dict = self._get_cmssw_path_list(config)
		if config.get_state('init', detail='sandbox'):
			msg = 'CMSSW tarball already exists! Do you want to regenerate it?'
			if os.path.exists(self._project_area_tarball) and not self._uii.prompt_bool(msg, True):
				return
			# Generate CMSSW tarball
			if self._project_area:
				create_tarball(_match_files(self._project_area,
					self._project_area_matcher, self._always_matcher.create_matcher(''),
					self._project_area_base_fn), name=self._project_area_tarball)
			if self._project_area_tarball_on_se:
				config.set_state(True, 'init', detail='storage')

	def get_command(self):
		return './gc-run.cmssw.sh $@'

	def get_job_arguments(self, jobnum):
		return SCRAMTask.get_job_arguments(self, jobnum) + ' ' + self.arguments

	def get_job_dict(self, jobnum):
		# Get job dependent environment variables
		job_env_dict = SCRAMTask.get_job_dict(self, jobnum)
		if not self._has_dataset:
			job_env_dict['MAX_EVENTS'] = self._events_per_job
		job_env_dict.update(dict(self._cmssw_search_dict))
		if self._do_gzip_std_output:
			job_env_dict['GZIP_OUT'] = 'yes'
		if self._project_area_tarball_on_se:
			job_env_dict['SE_RUNTIME'] = 'yes'
		if self._project_area:
			job_env_dict['HAS_RUNTIME'] = 'yes'
		job_env_dict['CMSSW_EXEC'] = 'cmsRun'
		job_env_dict['CMSSW_CONFIG'] = str.join(' ', imap(os.path.basename, self._config_fn_list))
		job_env_dict['CMSSW_OLD_RELEASETOP'] = self._old_release_top
		if self.prolog.is_active():
			job_env_dict['CMSSW_PROLOG_EXEC'] = self.prolog.get_command()
			job_env_dict['CMSSW_PROLOG_SB_IN_FILES'] = str.join(' ',
				imap(lambda x: x.path_rel, self.prolog.get_sb_in_fpi_list()))
			job_env_dict['CMSSW_PROLOG_ARGS'] = self.prolog.get_arguments()
		if self.epilog.is_active():
			job_env_dict['CMSSW_EPILOG_EXEC'] = self.epilog.get_command()
			job_env_dict['CMSSW_EPILOG_SB_IN_FILES'] = str.join(' ',
				imap(lambda x: x.path_rel, self.epilog.get_sb_in_fpi_list()))
			job_env_dict['CMSSW_EPILOG_ARGS'] = self.epilog.get_arguments()
		return job_env_dict

	def get_sb_in_fpi_list(self):
		# Get files for input sandbox
		fpi_list = (SCRAMTask.get_sb_in_fpi_list(self) + self.prolog.get_sb_in_fpi_list() +
			self.epilog.get_sb_in_fpi_list())
		for config_file in self._config_fn_list:
			fpi_list.append(Result(path_abs=config_file, path_rel=os.path.basename(config_file)))
		if self._project_area and not self._project_area_tarball_on_se:
			fpi_list.append(Result(path_abs=self._project_area_tarball,
				path_rel=os.path.basename(self._project_area_tarball)))
		return fpi_list + [self._script_fpi]

	def get_sb_out_fn_list(self):
		# Get files for output sandbox
		if not self._config_fn_list:
			return SCRAMTask.get_sb_out_fn_list(self)
		return SCRAMTask.get_sb_out_fn_list(self) + self._cmsrun_output_files

	def get_se_in_fn_list(self):
		# Get files to be transfered via SE (description, source, target)
		files = SCRAMTask.get_se_in_fn_list(self)
		if self._project_area and self._project_area_tarball_on_se:
			return files + [('CMSSW tarball', self._project_area_tarball, self._task_id + '.tar.gz')]
		return files

	def _config_find_uninitialized(self, config, config_file_list, auto_prepare, must_prepare):
		common_path = os.path.dirname(os.path.commonprefix(config_file_list))

		config_file_list_todo = []
		config_file_status_list = []
		for cfg in config_file_list:
			cfg_new = config.get_work_path(os.path.basename(cfg))
			cfg_new_exists = os.path.exists(cfg_new)
			if cfg_new_exists:
				is_instrumented = self._config_is_instrumented(cfg_new)
				do_copy = False
			else:
				is_instrumented = self._config_is_instrumented(cfg)
				do_copy = True
			do_prepare = (must_prepare or auto_prepare) and not is_instrumented
			do_copy = do_copy or do_prepare
			if do_copy:
				config_file_list_todo.append((cfg, cfg_new, do_prepare))
			config_file_status_list.append({1: cfg.split(common_path, 1)[1].lstrip('/'), 2: cfg_new_exists,
				3: is_instrumented, 4: do_prepare})

		if config_file_status_list:
			config_file_status_header = [(1, 'Config file'), (2, 'Work dir'),
				(3, 'Instrumented'), (4, 'Scheduled')]
			ConsoleTable.create(config_file_status_header, config_file_status_list, 'lccc')
		return config_file_list_todo

	def _config_is_instrumented(self, fn):
		cfg = SafeFile(fn).read_close()
		for tag in self._needed_vn_set:
			if (not '__%s__' % tag in cfg) and (not '@%s@' % tag in cfg):
				return False
		return True

	def _config_store_backup(self, source, target, fragment_path=None):
		content = SafeFile(source).read_close()
		if fragment_path:
			self._log.info('Instrumenting... %s', os.path.basename(source))
			content += SafeFile(fragment_path).read_close()
		SafeFile(target, 'w').write_close(content)

	def _create_datasource(self, config, name, psrc_repository, psrc_list):
		psrc_data = SCRAMTask._create_datasource(self, config, name, psrc_repository, psrc_list)
		if psrc_data is not None:
			self._needed_vn_set.update(psrc_data.get_needed_dataset_keys())
		return psrc_data

	def _get_cmssw_path_list(self, config):
		result = []
		path_cmssw_user = config.get(['cmssw dir', 'vo software dir'], '')
		if path_cmssw_user:
			path_cmssw_local = os.path.abspath(clean_path(path_cmssw_user))
			if os.path.exists(path_cmssw_local):
				path_cmssw_user = path_cmssw_local
		if path_cmssw_user:
			result.append(('CMSSW_DIR_USER', path_cmssw_user))
		if self._old_release_top:
			path_scram_project = os.path.normpath('%s/../../../../' % self._old_release_top)
			result.append(('CMSSW_DIR_PRO', path_scram_project))
		self._log.info('Local jobs will try to use the CMSSW software located here:')
		for idx, loc in enumerate(result):
			self._log.info(' %i) %s', idx + 1, loc[1])
		if result:
			self._log.info('')
		return result

	def _get_config_file_default(self):
		if self.prolog.is_active() or self.epilog.is_active():
			return []
		return unspecified

	def _get_var_name_list(self):
		result = SCRAMTask._get_var_name_list(self)
		if not self._has_dataset:
			result.append('MAX_EVENTS')
		return result

	def _process_config_file_list(self, config, config_file_list,
			fragment_path, auto_prepare, must_prepare):
		# process list of uninitialized config files
		iter_uninitialized_config_files = self._config_find_uninitialized(config,
			config_file_list, auto_prepare, must_prepare)
		for (cfg, cfg_new, do_prepare) in iter_uninitialized_config_files:
			ask_user_msg = 'Do you want to prepare %s for running over the dataset?' % cfg
			if do_prepare and (auto_prepare or self._uii.prompt_bool(ask_user_msg, True)):
				self._config_store_backup(cfg, cfg_new, fragment_path)
			else:
				self._config_store_backup(cfg, cfg_new)

		result = []
		for cfg in config_file_list:
			cfg_new = config.get_work_path(os.path.basename(cfg))
			if not os.path.exists(cfg_new):
				raise ConfigError('Config file %r was not copied to the work directory!' % cfg)
			is_instrumented = self._config_is_instrumented(cfg_new)
			if must_prepare and not is_instrumented:
				raise ConfigError('Config file %r must use %s to work properly!' %
					(cfg, str.join(', ', imap(lambda x: '@%s@' % x, sorted(self._needed_vn_set)))))
			if auto_prepare and not is_instrumented:
				self._log.warning('Config file %r was not instrumented!', cfg)
			result.append(cfg_new)
		return result
Exemplo n.º 11
0
	def __init__(self, config, name):
		config.set('se input timeout', '0:30', override = False)
		config.set('dataset provider', 'DBS3Provider', override = False)
		config.set('dataset splitter', 'EventBoundarySplitter', override = False)
		DataTask.__init__(self, config, name)
		self.errorDict.update(dict(self.updateErrorDict(utils.pathShare('gc-run.cmssw.sh', pkg = 'grid_control_cms'))))

		# SCRAM info
		scramProject = config.getList('scram project', [])
		if len(scramProject):
			self.projectArea = config.getPath('project area', '')
			if len(self.projectArea):
				raise ConfigError('Cannot specify both SCRAM project and project area')
			if len(scramProject) != 2:
				raise ConfigError('SCRAM project needs exactly 2 arguments: PROJECT VERSION')
		else:
			self.projectArea = config.getPath('project area')

		# This works in tandem with provider_dbsv2.py !
		self.selectedLumis = parseLumiFilter(config.get('lumi filter', ''))

		self.useReqs = config.getBool('software requirements', True, onChange = None)
		self.seRuntime = config.getBool('se runtime', False)
		self.runtimePath = config.getWorkPath('runtime.tar.gz')

		if len(self.projectArea):
			defaultPattern = '-.* -config bin lib python module */data *.xml *.sql *.cf[if] *.py -*/.git -*/.svn -*/CVS -*/work.*'
			self.pattern = config.getList('area files', defaultPattern.split())

			if os.path.exists(self.projectArea):
				utils.vprint('Project area found in: %s' % self.projectArea, -1)
			else:
				raise ConfigError('Specified config area %r does not exist!' % self.projectArea)

			scramPath = os.path.join(self.projectArea, '.SCRAM')
			# try to open it
			try:
				fp = open(os.path.join(scramPath, 'Environment'), 'r')
				self.scramEnv = utils.DictFormat().parse(fp, keyParser = {None: str})
			except:
				raise ConfigError('Project area file %s/.SCRAM/Environment cannot be parsed!' % self.projectArea)

			for key in ['SCRAM_PROJECTNAME', 'SCRAM_PROJECTVERSION']:
				if key not in self.scramEnv:
					raise ConfigError('Installed program in project area not recognized.')

			archs = filter(lambda x: os.path.isdir(os.path.join(scramPath, x)) and not x.startswith('.'), os.listdir(scramPath))
			self.scramArch = config.get('scram arch', (archs + [noDefault])[0])
			try:
				fp = open(os.path.join(scramPath, self.scramArch, 'Environment'), 'r')
				self.scramEnv.update(utils.DictFormat().parse(fp, keyParser = {None: str}))
			except:
				raise ConfigError('Project area file .SCRAM/%s/Environment cannot be parsed!' % self.scramArch)
		else:
			self.scramEnv = {
				'SCRAM_PROJECTNAME': scramProject[0],
				'SCRAM_PROJECTVERSION': scramProject[1]
			}
			self.scramArch = config.get('scram arch')

		self.scramVersion = config.get('scram version', 'scramv1')
		if self.scramEnv['SCRAM_PROJECTNAME'] != 'CMSSW':
			raise ConfigError('Project area not a valid CMSSW project area.')

		# Information about search order for software environment
		self.searchLoc = []
		if config.getState('sandbox'):
			userPath = config.get('cmssw dir', '')
			if userPath != '':
				self.searchLoc.append(('CMSSW_DIR_USER', userPath))
			if self.scramEnv.get('RELEASETOP', None):
				projPath = os.path.normpath('%s/../../../../' % self.scramEnv['RELEASETOP'])
				self.searchLoc.append(('CMSSW_DIR_PRO', projPath))
		if len(self.searchLoc):
			utils.vprint('Local jobs will try to use the CMSSW software located here:', -1)
			for i, loc in enumerate(self.searchLoc):
				key, value = loc
				utils.vprint(' %i) %s' % (i + 1, value), -1)

		# Prolog / Epilog script support - warn about old syntax
		self.prolog = TaskExecutableWrapper(config, 'prolog', '')
		self.epilog = TaskExecutableWrapper(config, 'epilog', '')
		if config.getPaths('executable', []) != []:
			raise ConfigError('Prefix executable and argument options with either prolog or epilog!')
		self.arguments = config.get('arguments', '')

		# Get cmssw config files and check their existance
		self.configFiles = []
		cfgDefault = QM(self.prolog.isActive() or self.epilog.isActive(), [], noDefault)
		for cfgFile in config.getPaths('config file', cfgDefault, mustExist = False):
			newPath = config.getWorkPath(os.path.basename(cfgFile))
			if not os.path.exists(newPath):
				if not os.path.exists(cfgFile):
					raise ConfigError('Config file %r not found.' % cfgFile)
				shutil.copyfile(cfgFile, newPath)
			self.configFiles.append(newPath)

		# Check that for dataset jobs the necessary placeholders are in the config file
		self.prepare = config.getBool('prepare config', False)
		fragment = config.getPath('instrumentation fragment',
			os.path.join('packages', 'grid_control_cms', 'share', 'fragmentForCMSSW.py'))
		if self.dataSplitter != None:
			if config.getState('sandbox'):
				if len(self.configFiles) > 0:
					self.instrumentCfgQueue(self.configFiles, fragment, mustPrepare = True)
		else:
			self.eventsPerJob = config.get('events per job', '0')
			if config.getState(detail = 'sandbox') and self.prepare:
				self.instrumentCfgQueue(self.configFiles, fragment)
		if not os.path.exists(config.getWorkPath('runtime.tar.gz')):
			config.setState(True, detail = 'sandbox')
		if config.getState(detail = 'sandbox'):
			if os.path.exists(config.getWorkPath('runtime.tar.gz')):
				if not utils.getUserBool('Runtime already exists! Do you want to regenerate CMSSW tarball?', True):
					return
			# Generate runtime tarball (and move to SE)
			if self.projectArea:
				utils.genTarball(config.getWorkPath('runtime.tar.gz'), utils.matchFiles(self.projectArea, self.pattern))
			if self.seRuntime:
				config.setState(True, detail = 'storage')
Exemplo n.º 12
0
class CMSSW(DataTask):
	configSections = DataTask.configSections + ['CMSSW']

	def __init__(self, config, name):
		config.set('se input timeout', '0:30')
		config.set('dataset provider', 'DBS3Provider')
		config.set('dataset splitter', 'EventBoundarySplitter')
		config.set('partition processor', 'CMSPartitionProcessor LocationPartitionProcessor LumiPartitionProcessor')
		config.set('dataset processor', 'LumiDataProcessor', '+=')
		DataTask.__init__(self, config, name)
		self.updateErrorDict(utils.pathShare('gc-run.cmssw.sh', pkg = 'grid_control_cms'))

		# SCRAM settings
		self._configureSCRAMSettings(config)

		self.useReqs = config.getBool('software requirements', True, onChange = None)
		self._projectAreaTarballSE = config.getBool(['se project area', 'se runtime'], True)
		self._projectAreaTarball = config.getWorkPath('cmssw-project-area.tar.gz')

		# Information about search order for software environment
		self.searchLoc = self._getCMSSWPaths(config)
		# Prolog / Epilog script support - warn about old syntax
		self.prolog = TaskExecutableWrapper(config, 'prolog', '')
		self.epilog = TaskExecutableWrapper(config, 'epilog', '')
		if config.getPaths('executable', []) != []:
			raise ConfigError('Prefix executable and argument options with either prolog or epilog!')
		self.arguments = config.get('arguments', '')

		# Get cmssw config files and check their existance
		# Check that for dataset jobs the necessary placeholders are in the config file
		if self.dataSplitter is None:
			self.eventsPerJob = config.get('events per job', '0')
		fragment = config.getPath('instrumentation fragment', utils.pathShare('fragmentForCMSSW.py', pkg = 'grid_control_cms'))
		self.configFiles = self._processConfigFiles(config, list(self._getConfigFiles(config)), fragment,
			autoPrepare = config.getBool('instrumentation', True),
			mustPrepare = (self.dataSplitter is not None))

		# Create project area tarball
		if not os.path.exists(self._projectAreaTarball):
			config.setState(True, 'init', detail = 'sandbox')
		if config.getState('init', detail = 'sandbox'):
			if os.path.exists(self._projectAreaTarball):
				if not utils.getUserBool('CMSSW tarball already exists! Do you want to regenerate it?', True):
					return
			# Generate CMSSW tarball
			if self.projectArea:
				utils.genTarball(self._projectAreaTarball, utils.matchFiles(self.projectArea, self.pattern))
			if self._projectAreaTarballSE:
				config.setState(True, 'init', detail = 'storage')


	def _configureSCRAMSettings(self, config):
		scramProject = config.getList('scram project', [])
		if len(scramProject):
			self.projectArea = config.getPath('project area', '')
			if len(self.projectArea):
				raise ConfigError('Cannot specify both SCRAM project and project area')
			if len(scramProject) != 2:
				raise ConfigError('SCRAM project needs exactly 2 arguments: PROJECT VERSION')
		else:
			self.projectArea = config.getPath('project area')

		if len(self.projectArea):
			defaultPattern = '-.* -config bin lib python module */data *.xml *.sql *.cf[if] *.py -*/.git -*/.svn -*/CVS -*/work.*'
			self.pattern = config.getList('area files', defaultPattern.split())

			if os.path.exists(self.projectArea):
				utils.vprint('Project area found in: %s' % self.projectArea, -1)
			else:
				raise ConfigError('Specified config area %r does not exist!' % self.projectArea)

			scramPath = os.path.join(self.projectArea, '.SCRAM')
			# try to open it
			try:
				fp = open(os.path.join(scramPath, 'Environment'), 'r')
				self.scramEnv = utils.DictFormat().parse(fp, keyParser = {None: str})
			except Exception:
				raise ConfigError('Project area file %s/.SCRAM/Environment cannot be parsed!' % self.projectArea)

			for key in ['SCRAM_PROJECTNAME', 'SCRAM_PROJECTVERSION']:
				if key not in self.scramEnv:
					raise ConfigError('Installed program in project area not recognized.')

			archs = lfilter(lambda x: os.path.isdir(os.path.join(scramPath, x)) and not x.startswith('.'), os.listdir(scramPath))
			self.scramArch = config.get('scram arch', (archs + [noDefault])[0])
			try:
				fp = open(os.path.join(scramPath, self.scramArch, 'Environment'), 'r')
				self.scramEnv.update(utils.DictFormat().parse(fp, keyParser = {None: str}))
			except Exception:
				raise ConfigError('Project area file .SCRAM/%s/Environment cannot be parsed!' % self.scramArch)
		else:
			self.scramEnv = {
				'SCRAM_PROJECTNAME': scramProject[0],
				'SCRAM_PROJECTVERSION': scramProject[1]
			}
			self.scramArch = config.get('scram arch')

		self.scramVersion = config.get('scram version', 'scramv1')
		if self.scramEnv['SCRAM_PROJECTNAME'] != 'CMSSW':
			raise ConfigError('Project area contains no CMSSW project')


	def _getCMSSWPaths(self, config):
		result = []
		if config.getState('init', detail = 'sandbox'):
			userPath = config.get('cmssw dir', '')
			if userPath != '':
				result.append(('CMSSW_DIR_USER', userPath))
			if self.scramEnv.get('RELEASETOP', None):
				projPath = os.path.normpath('%s/../../../../' % self.scramEnv['RELEASETOP'])
				result.append(('CMSSW_DIR_PRO', projPath))
		if result:
			utils.vprint('Local jobs will try to use the CMSSW software located here:', -1)
			for i, loc in enumerate(result):
				utils.vprint(' %i) %s' % (i + 1, loc[1]), -1)
		return result


	def _getConfigFiles(self, config):
		cfgDefault = utils.QM(self.prolog.isActive() or self.epilog.isActive(), [], noDefault)
		for cfgFile in config.getPaths('config file', cfgDefault, mustExist = False):
			if not os.path.exists(cfgFile):
				raise ConfigError('Config file %r not found.' % cfgFile)
			yield cfgFile


	def _cfgIsInstrumented(self, fn):
		fp = open(fn, 'r')
		try:
			cfg = fp.read()
		finally:
			fp.close()
		for tag in self.neededVars():
			if (not '__%s__' % tag in cfg) and (not '@%s@' % tag in cfg):
				return False
		return True


	def _cfgStore(self, source, target, fragment_path = None):
		fp = open(source, 'r')
		try:
			content = fp.read()
		finally:
			fp.close()
		fp = open(target, 'w')
		try:
			fp.write(content)
			if fragment_path:
				logging.getLogger('user').info('Instrumenting... %s', os.path.basename(source))
				fragment_fp = open(fragment_path, 'r')
				fp.write(fragment_fp.read())
				fragment_fp.close()
		finally:
			fp.close()


	def _cfgFindUninitialized(self, config, cfgFiles, autoPrepare, mustPrepare):
		comPath = os.path.dirname(os.path.commonprefix(cfgFiles))

		cfgTodo = []
		cfgStatus = []
		for cfg in cfgFiles:
			cfg_new = config.getWorkPath(os.path.basename(cfg))
			cfg_new_exists = os.path.exists(cfg_new)
			if cfg_new_exists:
				isInstrumented = self._cfgIsInstrumented(cfg_new)
				doCopy = False
			else:
				isInstrumented = self._cfgIsInstrumented(cfg)
				doCopy = True
			doPrepare = (mustPrepare or autoPrepare) and not isInstrumented
			doCopy = doCopy or doPrepare
			if doCopy:
				cfgTodo.append((cfg, cfg_new, doPrepare))
			cfgStatus.append({1: cfg.split(comPath, 1)[1].lstrip('/'), 2: cfg_new_exists,
				3: isInstrumented, 4: doPrepare})

		utils.vprint('', -1)
		utils.printTabular([(1, 'Config file'), (2, 'Work dir'), (3, 'Instrumented'), (4, 'Scheduled')], cfgStatus, 'lccc')
		utils.vprint('', -1)
		return cfgTodo


	def _processConfigFiles(self, config, cfgFiles, fragment_path, autoPrepare, mustPrepare):
		# process list of uninitialized config files
		for (cfg, cfg_new, doPrepare) in self._cfgFindUninitialized(config, cfgFiles, autoPrepare, mustPrepare):
			if doPrepare and (autoPrepare or utils.getUserBool('Do you want to prepare %s for running over the dataset?' % cfg, True)):
				self._cfgStore(cfg, cfg_new, fragment_path)
			else:
				self._cfgStore(cfg, cfg_new)

		result = []
		for cfg in cfgFiles:
			cfg_new = config.getWorkPath(os.path.basename(cfg))
			if not os.path.exists(cfg_new):
				raise ConfigError('Config file %r was not copied to the work directory!' % cfg)
			isInstrumented = self._cfgIsInstrumented(cfg_new)
			if mustPrepare and not isInstrumented:
				raise ConfigError('Config file %r must use %s to work properly!' %
					(cfg, str.join(', ', imap(lambda x: '@%s@' % x, self.neededVars()))))
			if autoPrepare and not isInstrumented:
				self._log.warning('Config file %r was not instrumented!', cfg)
			result.append(cfg_new)
		return result


	def neededVars(self):
		if self.dataSplitter:
			return self._dataPS.getNeededDataKeys()
		return []


	# Called on job submission
	def getSubmitInfo(self, jobNum):
		result = DataTask.getSubmitInfo(self, jobNum)
		result.update({'application': self.scramEnv['SCRAM_PROJECTVERSION'], 'exe': 'cmsRun'})
		if self.dataSplitter is None:
			result.update({'nevtJob': self.eventsPerJob})
		return result


	# Get environment variables for gc_config.sh
	def getTaskConfig(self):
		data = DataTask.getTaskConfig(self)
		data.update(dict(self.searchLoc))
		data['CMSSW_OLD_RELEASETOP'] = self.scramEnv.get('RELEASETOP', None)
		data['DB_EXEC'] = 'cmsRun'
		data['SCRAM_ARCH'] = self.scramArch
		data['SCRAM_VERSION'] = self.scramVersion
		data['SCRAM_PROJECTVERSION'] = self.scramEnv['SCRAM_PROJECTVERSION']
		data['GZIP_OUT'] = utils.QM(self.gzipOut, 'yes', 'no')
		data['SE_RUNTIME'] = utils.QM(self._projectAreaTarballSE, 'yes', 'no')
		data['HAS_RUNTIME'] = utils.QM(len(self.projectArea), 'yes', 'no')
		data['CMSSW_CONFIG'] = str.join(' ', imap(os.path.basename, self.configFiles))
		if self.prolog.isActive():
			data['CMSSW_PROLOG_EXEC'] = self.prolog.getCommand()
			data['CMSSW_PROLOG_SB_In_FILES'] = str.join(' ', imap(lambda x: x.pathRel, self.prolog.getSBInFiles()))
			data['CMSSW_PROLOG_ARGS'] = self.prolog.getArguments()
		if self.epilog.isActive():
			data['CMSSW_EPILOG_EXEC'] = self.epilog.getCommand()
			data['CMSSW_EPILOG_SB_In_FILES'] = str.join(' ', imap(lambda x: x.pathRel, self.epilog.getSBInFiles()))
			data['CMSSW_EPILOG_ARGS'] = self.epilog.getArguments()
		return data


	# Get job requirements
	def getRequirements(self, jobNum):
		reqs = DataTask.getRequirements(self, jobNum)
		if self.useReqs:
			reqs.append((WMS.SOFTWARE, 'VO-cms-%s' % self.scramArch))
		return reqs


	# Get files to be transfered via SE (description, source, target)
	def getSEInFiles(self):
		files = DataTask.getSEInFiles(self)
		if len(self.projectArea) and self._projectAreaTarballSE:
			return files + [('CMSSW tarball', self._projectAreaTarball, self.taskID + '.tar.gz')]
		return files


	# Get files for input sandbox
	def getSBInFiles(self):
		files = DataTask.getSBInFiles(self) + self.prolog.getSBInFiles() + self.epilog.getSBInFiles()
		for cfgFile in self.configFiles:
			files.append(utils.Result(pathAbs = cfgFile, pathRel = os.path.basename(cfgFile)))
		if len(self.projectArea) and not self._projectAreaTarballSE:
			files.append(utils.Result(pathAbs = self._projectAreaTarball, pathRel = os.path.basename(self._projectAreaTarball)))
		return files + [utils.Result(pathAbs = utils.pathShare('gc-run.cmssw.sh', pkg = 'grid_control_cms'), pathRel = 'gc-run.cmssw.sh')]


	# Get files for output sandbox
	def getSBOutFiles(self):
		return DataTask.getSBOutFiles(self) + utils.QM(self.gzipOut, ['cmssw.log.gz'], []) + ['cmssw.dbs.tar.gz']


	def getCommand(self):
		return './gc-run.cmssw.sh $@'


	def getJobArguments(self, jobNum):
		return DataTask.getJobArguments(self, jobNum) + ' ' + self.arguments


	def getVarNames(self):
		result = DataTask.getVarNames(self)
		if self.dataSplitter is None:
			result.append('MAX_EVENTS')
		return result


	# Get job dependent environment variables
	def getJobConfig(self, jobNum):
		data = DataTask.getJobConfig(self, jobNum)
		if self.dataSplitter is None:
			data['MAX_EVENTS'] = self.eventsPerJob
		return data


	def getDescription(self, jobNum): # (task name, job name, type)
		result = DataTask.getDescription(self, jobNum)
		if not result.jobType:
			result.jobType = 'analysis'
		return result


	def getDependencies(self):
		return DataTask.getDependencies(self) + ['cmssw']
Exemplo n.º 13
0
 def __init__(self, config, name):
     DataTask.__init__(self, config, name)
     self._exeWrap = TaskExecutableWrapper(config)
Exemplo n.º 14
0
    def __init__(self, config, name):
        config.set('se input timeout', '0:30')
        config.set('dataset provider', 'DBS3Provider')
        config.set('dataset splitter', 'EventBoundarySplitter')
        config.set(
            'partition processor',
            'CMSPartitionProcessor LocationPartitionProcessor LumiPartitionProcessor'
        )
        config.set('dataset processor', 'LumiDataProcessor', '+=')
        DataTask.__init__(self, config, name)
        self.updateErrorDict(
            utils.pathShare('gc-run.cmssw.sh', pkg='grid_control_cms'))

        # SCRAM settings
        self._configureSCRAMSettings(config)

        self.useReqs = config.getBool('software requirements',
                                      True,
                                      onChange=None)
        self._projectAreaTarballSE = config.getBool(
            ['se project area', 'se runtime'], True)
        self._projectAreaTarball = config.getWorkPath(
            'cmssw-project-area.tar.gz')

        # Information about search order for software environment
        self.searchLoc = self._getCMSSWPaths(config)
        # Prolog / Epilog script support - warn about old syntax
        self.prolog = TaskExecutableWrapper(config, 'prolog', '')
        self.epilog = TaskExecutableWrapper(config, 'epilog', '')
        if config.getPaths('executable', []) != []:
            raise ConfigError(
                'Prefix executable and argument options with either prolog or epilog!'
            )
        self.arguments = config.get('arguments', '')

        # Get cmssw config files and check their existance
        # Check that for dataset jobs the necessary placeholders are in the config file
        if self.dataSplitter is None:
            self.eventsPerJob = config.get('events per job', '0')
        fragment = config.getPath(
            'instrumentation fragment',
            utils.pathShare('fragmentForCMSSW.py', pkg='grid_control_cms'))
        self.configFiles = self._processConfigFiles(
            config,
            list(self._getConfigFiles(config)),
            fragment,
            autoPrepare=config.getBool('instrumentation', True),
            mustPrepare=(self.dataSplitter is not None))

        # Create project area tarball
        if not os.path.exists(self._projectAreaTarball):
            config.setState(True, 'init', detail='sandbox')
        if config.getState('init', detail='sandbox'):
            if os.path.exists(self._projectAreaTarball):
                if not utils.getUserBool(
                        'CMSSW tarball already exists! Do you want to regenerate it?',
                        True):
                    return
            # Generate CMSSW tarball
            if self.projectArea:
                utils.genTarball(
                    self._projectAreaTarball,
                    utils.matchFiles(self.projectArea, self.pattern))
            if self._projectAreaTarballSE:
                config.setState(True, 'init', detail='storage')
Exemplo n.º 15
0
class CMSSW(DataTask):
    configSections = DataTask.configSections + ['CMSSW']

    def __init__(self, config, name):
        config.set('se input timeout', '0:30')
        config.set('dataset provider', 'DBS3Provider')
        config.set('dataset splitter', 'EventBoundarySplitter')
        config.set(
            'partition processor',
            'CMSPartitionProcessor LocationPartitionProcessor LumiPartitionProcessor'
        )
        config.set('dataset processor', 'LumiDataProcessor', '+=')
        DataTask.__init__(self, config, name)
        self.updateErrorDict(
            utils.pathShare('gc-run.cmssw.sh', pkg='grid_control_cms'))

        # SCRAM settings
        self._configureSCRAMSettings(config)

        self.useReqs = config.getBool('software requirements',
                                      True,
                                      onChange=None)
        self._projectAreaTarballSE = config.getBool(
            ['se project area', 'se runtime'], True)
        self._projectAreaTarball = config.getWorkPath(
            'cmssw-project-area.tar.gz')

        # Information about search order for software environment
        self.searchLoc = self._getCMSSWPaths(config)
        # Prolog / Epilog script support - warn about old syntax
        self.prolog = TaskExecutableWrapper(config, 'prolog', '')
        self.epilog = TaskExecutableWrapper(config, 'epilog', '')
        if config.getPaths('executable', []) != []:
            raise ConfigError(
                'Prefix executable and argument options with either prolog or epilog!'
            )
        self.arguments = config.get('arguments', '')

        # Get cmssw config files and check their existance
        # Check that for dataset jobs the necessary placeholders are in the config file
        if self.dataSplitter is None:
            self.eventsPerJob = config.get('events per job', '0')
        fragment = config.getPath(
            'instrumentation fragment',
            utils.pathShare('fragmentForCMSSW.py', pkg='grid_control_cms'))
        self.configFiles = self._processConfigFiles(
            config,
            list(self._getConfigFiles(config)),
            fragment,
            autoPrepare=config.getBool('instrumentation', True),
            mustPrepare=(self.dataSplitter is not None))

        # Create project area tarball
        if not os.path.exists(self._projectAreaTarball):
            config.setState(True, 'init', detail='sandbox')
        if config.getState('init', detail='sandbox'):
            if os.path.exists(self._projectAreaTarball):
                if not utils.getUserBool(
                        'CMSSW tarball already exists! Do you want to regenerate it?',
                        True):
                    return
            # Generate CMSSW tarball
            if self.projectArea:
                utils.genTarball(
                    self._projectAreaTarball,
                    utils.matchFiles(self.projectArea, self.pattern))
            if self._projectAreaTarballSE:
                config.setState(True, 'init', detail='storage')

    def _configureSCRAMSettings(self, config):
        scramProject = config.getList('scram project', [])
        if len(scramProject):
            self.projectArea = config.getPath('project area', '')
            if len(self.projectArea):
                raise ConfigError(
                    'Cannot specify both SCRAM project and project area')
            if len(scramProject) != 2:
                raise ConfigError(
                    'SCRAM project needs exactly 2 arguments: PROJECT VERSION')
        else:
            self.projectArea = config.getPath('project area')

        if len(self.projectArea):
            defaultPattern = '-.* -config bin lib python module */data *.xml *.sql *.cf[if] *.py -*/.git -*/.svn -*/CVS -*/work.*'
            self.pattern = config.getList('area files', defaultPattern.split())

            if os.path.exists(self.projectArea):
                utils.vprint('Project area found in: %s' % self.projectArea,
                             -1)
            else:
                raise ConfigError('Specified config area %r does not exist!' %
                                  self.projectArea)

            scramPath = os.path.join(self.projectArea, '.SCRAM')
            # try to open it
            try:
                fp = open(os.path.join(scramPath, 'Environment'), 'r')
                self.scramEnv = utils.DictFormat().parse(fp,
                                                         keyParser={None: str})
            except Exception:
                raise ConfigError(
                    'Project area file %s/.SCRAM/Environment cannot be parsed!'
                    % self.projectArea)

            for key in ['SCRAM_PROJECTNAME', 'SCRAM_PROJECTVERSION']:
                if key not in self.scramEnv:
                    raise ConfigError(
                        'Installed program in project area not recognized.')

            archs = lfilter(
                lambda x: os.path.isdir(os.path.join(scramPath, x)) and not x.
                startswith('.'), os.listdir(scramPath))
            self.scramArch = config.get('scram arch', (archs + [noDefault])[0])
            try:
                fp = open(
                    os.path.join(scramPath, self.scramArch, 'Environment'),
                    'r')
                self.scramEnv.update(utils.DictFormat().parse(
                    fp, keyParser={None: str}))
            except Exception:
                raise ConfigError(
                    'Project area file .SCRAM/%s/Environment cannot be parsed!'
                    % self.scramArch)
        else:
            self.scramEnv = {
                'SCRAM_PROJECTNAME': scramProject[0],
                'SCRAM_PROJECTVERSION': scramProject[1]
            }
            self.scramArch = config.get('scram arch')

        self.scramVersion = config.get('scram version', 'scramv1')
        if self.scramEnv['SCRAM_PROJECTNAME'] != 'CMSSW':
            raise ConfigError('Project area contains no CMSSW project')

    def _getCMSSWPaths(self, config):
        result = []
        if config.getState('init', detail='sandbox'):
            userPath = config.get('cmssw dir', '')
            if userPath != '':
                result.append(('CMSSW_DIR_USER', userPath))
            if self.scramEnv.get('RELEASETOP', None):
                projPath = os.path.normpath('%s/../../../../' %
                                            self.scramEnv['RELEASETOP'])
                result.append(('CMSSW_DIR_PRO', projPath))
        if result:
            utils.vprint(
                'Local jobs will try to use the CMSSW software located here:',
                -1)
            for i, loc in enumerate(result):
                utils.vprint(' %i) %s' % (i + 1, loc[1]), -1)
        return result

    def _getConfigFiles(self, config):
        cfgDefault = utils.QM(self.prolog.isActive() or self.epilog.isActive(),
                              [], noDefault)
        for cfgFile in config.getPaths('config file',
                                       cfgDefault,
                                       mustExist=False):
            if not os.path.exists(cfgFile):
                raise ConfigError('Config file %r not found.' % cfgFile)
            yield cfgFile

    def _cfgIsInstrumented(self, fn):
        fp = open(fn, 'r')
        try:
            cfg = fp.read()
        finally:
            fp.close()
        for tag in self.neededVars():
            if (not '__%s__' % tag in cfg) and (not '@%s@' % tag in cfg):
                return False
        return True

    def _cfgStore(self, source, target, fragment_path=None):
        fp = open(source, 'r')
        try:
            content = fp.read()
        finally:
            fp.close()
        fp = open(target, 'w')
        try:
            fp.write(content)
            if fragment_path:
                logging.getLogger('user').info('Instrumenting... %s',
                                               os.path.basename(source))
                fragment_fp = open(fragment_path, 'r')
                fp.write(fragment_fp.read())
                fragment_fp.close()
        finally:
            fp.close()

    def _cfgFindUninitialized(self, config, cfgFiles, autoPrepare,
                              mustPrepare):
        comPath = os.path.dirname(os.path.commonprefix(cfgFiles))

        cfgTodo = []
        cfgStatus = []
        for cfg in cfgFiles:
            cfg_new = config.getWorkPath(os.path.basename(cfg))
            cfg_new_exists = os.path.exists(cfg_new)
            if cfg_new_exists:
                isInstrumented = self._cfgIsInstrumented(cfg_new)
                doCopy = False
            else:
                isInstrumented = self._cfgIsInstrumented(cfg)
                doCopy = True
            doPrepare = (mustPrepare or autoPrepare) and not isInstrumented
            doCopy = doCopy or doPrepare
            if doCopy:
                cfgTodo.append((cfg, cfg_new, doPrepare))
            cfgStatus.append({
                1: cfg.split(comPath, 1)[1].lstrip('/'),
                2: cfg_new_exists,
                3: isInstrumented,
                4: doPrepare
            })

        utils.vprint('', -1)
        utils.printTabular([(1, 'Config file'), (2, 'Work dir'),
                            (3, 'Instrumented'), (4, 'Scheduled')], cfgStatus,
                           'lccc')
        utils.vprint('', -1)
        return cfgTodo

    def _processConfigFiles(self, config, cfgFiles, fragment_path, autoPrepare,
                            mustPrepare):
        # process list of uninitialized config files
        for (cfg, cfg_new,
             doPrepare) in self._cfgFindUninitialized(config, cfgFiles,
                                                      autoPrepare,
                                                      mustPrepare):
            if doPrepare and (autoPrepare or utils.getUserBool(
                    'Do you want to prepare %s for running over the dataset?' %
                    cfg, True)):
                self._cfgStore(cfg, cfg_new, fragment_path)
            else:
                self._cfgStore(cfg, cfg_new)

        result = []
        for cfg in cfgFiles:
            cfg_new = config.getWorkPath(os.path.basename(cfg))
            if not os.path.exists(cfg_new):
                raise ConfigError(
                    'Config file %r was not copied to the work directory!' %
                    cfg)
            isInstrumented = self._cfgIsInstrumented(cfg_new)
            if mustPrepare and not isInstrumented:
                raise ConfigError(
                    'Config file %r must use %s to work properly!' %
                    (cfg,
                     str.join(', ',
                              imap(lambda x: '@%s@' % x, self.neededVars()))))
            if autoPrepare and not isInstrumented:
                self._log.warning('Config file %r was not instrumented!', cfg)
            result.append(cfg_new)
        return result

    def neededVars(self):
        if self.dataSplitter:
            return self._dataPS.getNeededDataKeys()
        return []

    # Called on job submission
    def getSubmitInfo(self, jobNum):
        result = DataTask.getSubmitInfo(self, jobNum)
        result.update({
            'application': self.scramEnv['SCRAM_PROJECTVERSION'],
            'exe': 'cmsRun'
        })
        if self.dataSplitter is None:
            result.update({'nevtJob': self.eventsPerJob})
        return result

    # Get environment variables for gc_config.sh
    def getTaskConfig(self):
        data = DataTask.getTaskConfig(self)
        data.update(dict(self.searchLoc))
        data['CMSSW_OLD_RELEASETOP'] = self.scramEnv.get('RELEASETOP', None)
        data['DB_EXEC'] = 'cmsRun'
        data['SCRAM_ARCH'] = self.scramArch
        data['SCRAM_VERSION'] = self.scramVersion
        data['SCRAM_PROJECTVERSION'] = self.scramEnv['SCRAM_PROJECTVERSION']
        data['GZIP_OUT'] = utils.QM(self.gzipOut, 'yes', 'no')
        data['SE_RUNTIME'] = utils.QM(self._projectAreaTarballSE, 'yes', 'no')
        data['HAS_RUNTIME'] = utils.QM(len(self.projectArea), 'yes', 'no')
        data['CMSSW_CONFIG'] = str.join(
            ' ', imap(os.path.basename, self.configFiles))
        if self.prolog.isActive():
            data['CMSSW_PROLOG_EXEC'] = self.prolog.getCommand()
            data['CMSSW_PROLOG_SB_In_FILES'] = str.join(
                ' ', imap(lambda x: x.pathRel, self.prolog.getSBInFiles()))
            data['CMSSW_PROLOG_ARGS'] = self.prolog.getArguments()
        if self.epilog.isActive():
            data['CMSSW_EPILOG_EXEC'] = self.epilog.getCommand()
            data['CMSSW_EPILOG_SB_In_FILES'] = str.join(
                ' ', imap(lambda x: x.pathRel, self.epilog.getSBInFiles()))
            data['CMSSW_EPILOG_ARGS'] = self.epilog.getArguments()
        return data

    # Get job requirements
    def getRequirements(self, jobNum):
        reqs = DataTask.getRequirements(self, jobNum)
        if self.useReqs:
            reqs.append((WMS.SOFTWARE, 'VO-cms-%s' % self.scramArch))
        return reqs

    # Get files to be transfered via SE (description, source, target)
    def getSEInFiles(self):
        files = DataTask.getSEInFiles(self)
        if len(self.projectArea) and self._projectAreaTarballSE:
            return files + [('CMSSW tarball', self._projectAreaTarball,
                             self.taskID + '.tar.gz')]
        return files

    # Get files for input sandbox
    def getSBInFiles(self):
        files = DataTask.getSBInFiles(
            self) + self.prolog.getSBInFiles() + self.epilog.getSBInFiles()
        for cfgFile in self.configFiles:
            files.append(
                utils.Result(pathAbs=cfgFile,
                             pathRel=os.path.basename(cfgFile)))
        if len(self.projectArea) and not self._projectAreaTarballSE:
            files.append(
                utils.Result(pathAbs=self._projectAreaTarball,
                             pathRel=os.path.basename(
                                 self._projectAreaTarball)))
        return files + [
            utils.Result(pathAbs=utils.pathShare('gc-run.cmssw.sh',
                                                 pkg='grid_control_cms'),
                         pathRel='gc-run.cmssw.sh')
        ]

    # Get files for output sandbox
    def getSBOutFiles(self):
        return DataTask.getSBOutFiles(self) + utils.QM(
            self.gzipOut, ['cmssw.log.gz'], []) + ['cmssw.dbs.tar.gz']

    def getCommand(self):
        return './gc-run.cmssw.sh $@'

    def getJobArguments(self, jobNum):
        return DataTask.getJobArguments(self, jobNum) + ' ' + self.arguments

    def getVarNames(self):
        result = DataTask.getVarNames(self)
        if self.dataSplitter is None:
            result.append('MAX_EVENTS')
        return result

    # Get job dependent environment variables
    def getJobConfig(self, jobNum):
        data = DataTask.getJobConfig(self, jobNum)
        if self.dataSplitter is None:
            data['MAX_EVENTS'] = self.eventsPerJob
        return data

    def getDescription(self, jobNum):  # (task name, job name, type)
        result = DataTask.getDescription(self, jobNum)
        if not result.jobType:
            result.jobType = 'analysis'
        return result

    def getDependencies(self):
        return DataTask.getDependencies(self) + ['cmssw']
Exemplo n.º 16
0
class CMSSW(SCRAMTask):
    alias_list = ['']
    config_section_list = SCRAMTask.config_section_list + ['CMSSW']

    def __init__(self, config, name):
        config.set('se input timeout', '0:30')
        config.set('application', 'cmsRun', section='dashboard')
        config.set('dataset provider', 'DBS3Provider')
        config.set('dataset splitter', 'EventBoundarySplitter')
        config.set('dataset processor', 'LumiDataProcessor', '+=')
        config.set('partition processor', 'BasicPartitionProcessor', '-=')
        config.set(
            'partition processor',
            'LFNPartitionProcessor LumiPartitionProcessor CMSSWPartitionProcessor',
            '+=')

        self._needed_vn_set = set()
        SCRAMTask.__init__(self, config, name)
        self._uii = UserInputInterface()

        # Setup file path informations
        self._cmsrun_output_files = ['cmssw.dbs.tar.gz']
        if self._do_gzip_std_output:
            self._cmsrun_output_files.append('cmssw.log.gz')
        self._script_fpi = Result(path_rel='gc-run.cmssw.sh',
                                  path_abs=get_path_share(
                                      'gc-run.cmssw.sh',
                                      pkg='grid_control_cms'))

        if self._scram_project != 'CMSSW':
            raise ConfigError('Project area contains no CMSSW project')

        self._old_release_top = None
        if self._project_area:
            scram_arch_env_path = os.path.join(self._project_area, '.SCRAM',
                                               self._scram_arch, 'Environment')
            self._old_release_top = self._parse_scram_file(
                scram_arch_env_path).get('RELEASETOP')

        self._update_map_error_code2msg(
            get_path_share('gc-run.cmssw.sh', pkg='grid_control_cms'))

        self._project_area_tarball_on_se = config.get_bool(
            ['se runtime', 'se project area'], True)
        self._project_area_tarball = config.get_work_path(
            'cmssw-project-area.tar.gz')

        # Prolog / Epilog script support - warn about old syntax
        self.prolog = TaskExecutableWrapper(config, 'prolog', '')
        self.epilog = TaskExecutableWrapper(config, 'epilog', '')
        if config.get_fn_list('executable', []) != []:
            raise ConfigError(
                'Prefix executable and argument options with either prolog or epilog!'
            )
        self.arguments = config.get('arguments', '')

        # Get cmssw config files and check their existance
        # Check that for dataset jobs the necessary placeholders are in the config file
        if not self._has_dataset:
            self._events_per_job = config.get('events per job', '0')
            # this can be a variable like @USER_EVENTS@!
            self._needed_vn_set.add('MAX_EVENTS')
        fragment = config.get_fn(
            'instrumentation fragment',
            get_path_share('fragmentForCMSSW.py', pkg='grid_control_cms'))
        self._config_fn_list = self._process_config_file_list(
            config,
            config.get_fn_list('config file', self._get_config_file_default()),
            fragment,
            auto_prepare=config.get_bool('instrumentation', True),
            must_prepare=self._has_dataset)

        # Create project area tarball
        if self._project_area and not os.path.exists(
                self._project_area_tarball):
            config.set_state(True, 'init', detail='sandbox')
        # Information about search order for software environment
        self._cmssw_search_dict = self._get_cmssw_path_list(config)
        if config.get_state('init', detail='sandbox'):
            msg = 'CMSSW tarball already exists! Do you want to regenerate it?'
            if os.path.exists(
                    self._project_area_tarball) and not self._uii.prompt_bool(
                        msg, True):
                return
            # Generate CMSSW tarball
            if self._project_area:
                create_tarball(_match_files(
                    self._project_area, self._project_area_matcher,
                    self._always_matcher.create_matcher(''),
                    self._project_area_base_fn),
                               name=self._project_area_tarball)
            if self._project_area_tarball_on_se:
                config.set_state(True, 'init', detail='storage')

    def get_command(self):
        return './gc-run.cmssw.sh $@'

    def get_job_arguments(self, jobnum):
        return SCRAMTask.get_job_arguments(self, jobnum) + ' ' + self.arguments

    def get_job_dict(self, jobnum):
        # Get job dependent environment variables
        job_env_dict = SCRAMTask.get_job_dict(self, jobnum)
        if not self._has_dataset:
            job_env_dict['MAX_EVENTS'] = self._events_per_job
        job_env_dict.update(dict(self._cmssw_search_dict))
        if self._do_gzip_std_output:
            job_env_dict['GZIP_OUT'] = 'yes'
        if self._project_area_tarball_on_se:
            job_env_dict['SE_RUNTIME'] = 'yes'
        if self._project_area:
            job_env_dict['HAS_RUNTIME'] = 'yes'
        job_env_dict['CMSSW_EXEC'] = 'cmsRun'
        job_env_dict['CMSSW_CONFIG'] = str.join(
            ' ', imap(os.path.basename, self._config_fn_list))
        job_env_dict['CMSSW_OLD_RELEASETOP'] = self._old_release_top
        if self.prolog.is_active():
            job_env_dict['CMSSW_PROLOG_EXEC'] = self.prolog.get_command()
            job_env_dict['CMSSW_PROLOG_SB_IN_FILES'] = str.join(
                ' ',
                imap(lambda x: x.path_rel, self.prolog.get_sb_in_fpi_list()))
            job_env_dict['CMSSW_PROLOG_ARGS'] = self.prolog.get_arguments()
        if self.epilog.is_active():
            job_env_dict['CMSSW_EPILOG_EXEC'] = self.epilog.get_command()
            job_env_dict['CMSSW_EPILOG_SB_IN_FILES'] = str.join(
                ' ',
                imap(lambda x: x.path_rel, self.epilog.get_sb_in_fpi_list()))
            job_env_dict['CMSSW_EPILOG_ARGS'] = self.epilog.get_arguments()
        return job_env_dict

    def get_sb_in_fpi_list(self):
        # Get files for input sandbox
        fpi_list = (SCRAMTask.get_sb_in_fpi_list(self) +
                    self.prolog.get_sb_in_fpi_list() +
                    self.epilog.get_sb_in_fpi_list())
        for config_file in self._config_fn_list:
            fpi_list.append(
                Result(path_abs=config_file,
                       path_rel=os.path.basename(config_file)))
        if self._project_area and not self._project_area_tarball_on_se:
            fpi_list.append(
                Result(path_abs=self._project_area_tarball,
                       path_rel=os.path.basename(self._project_area_tarball)))
        return fpi_list + [self._script_fpi]

    def get_sb_out_fn_list(self):
        # Get files for output sandbox
        if not self._config_fn_list:
            return SCRAMTask.get_sb_out_fn_list(self)
        return SCRAMTask.get_sb_out_fn_list(self) + self._cmsrun_output_files

    def get_se_in_fn_list(self):
        # Get files to be transfered via SE (description, source, target)
        files = SCRAMTask.get_se_in_fn_list(self)
        if self._project_area and self._project_area_tarball_on_se:
            return files + [('CMSSW tarball', self._project_area_tarball,
                             self._task_id + '.tar.gz')]
        return files

    def _config_find_uninitialized(self, config, config_file_list,
                                   auto_prepare, must_prepare):
        common_path = os.path.dirname(os.path.commonprefix(config_file_list))

        config_file_list_todo = []
        config_file_status_list = []
        for cfg in config_file_list:
            cfg_new = config.get_work_path(os.path.basename(cfg))
            cfg_new_exists = os.path.exists(cfg_new)
            if cfg_new_exists:
                is_instrumented = self._config_is_instrumented(cfg_new)
                do_copy = False
            else:
                is_instrumented = self._config_is_instrumented(cfg)
                do_copy = True
            do_prepare = (must_prepare or auto_prepare) and not is_instrumented
            do_copy = do_copy or do_prepare
            if do_copy:
                config_file_list_todo.append((cfg, cfg_new, do_prepare))
            config_file_status_list.append({
                1:
                cfg.split(common_path, 1)[1].lstrip('/'),
                2:
                cfg_new_exists,
                3:
                is_instrumented,
                4:
                do_prepare
            })

        if config_file_status_list:
            config_file_status_header = [(1, 'Config file'), (2, 'Work dir'),
                                         (3, 'Instrumented'), (4, 'Scheduled')]
            ConsoleTable.create(config_file_status_header,
                                config_file_status_list, 'lccc')
        return config_file_list_todo

    def _config_is_instrumented(self, fn):
        cfg = SafeFile(fn).read_close()
        for tag in self._needed_vn_set:
            if (not '__%s__' % tag in cfg) and (not '@%s@' % tag in cfg):
                return False
        return True

    def _config_store_backup(self, source, target, fragment_path=None):
        content = SafeFile(source).read_close()
        if fragment_path:
            self._log.info('Instrumenting... %s', os.path.basename(source))
            content += SafeFile(fragment_path).read_close()
        SafeFile(target, 'w').write_close(content)

    def _create_datasource(self, config, name, psrc_repository, psrc_list):
        psrc_data = SCRAMTask._create_datasource(self, config, name,
                                                 psrc_repository, psrc_list)
        if psrc_data is not None:
            self._needed_vn_set.update(psrc_data.get_needed_dataset_keys())
        return psrc_data

    def _get_cmssw_path_list(self, config):
        result = []
        path_cmssw_user = config.get(['cmssw dir', 'vo software dir'], '')
        if path_cmssw_user:
            path_cmssw_local = os.path.abspath(clean_path(path_cmssw_user))
            if os.path.exists(path_cmssw_local):
                path_cmssw_user = path_cmssw_local
        if path_cmssw_user:
            result.append(('CMSSW_DIR_USER', path_cmssw_user))
        if self._old_release_top:
            path_scram_project = os.path.normpath('%s/../../../../' %
                                                  self._old_release_top)
            result.append(('CMSSW_DIR_PRO', path_scram_project))
        self._log.info(
            'Local jobs will try to use the CMSSW software located here:')
        for idx, loc in enumerate(result):
            self._log.info(' %i) %s', idx + 1, loc[1])
        if result:
            self._log.info('')
        return result

    def _get_config_file_default(self):
        if self.prolog.is_active() or self.epilog.is_active():
            return []
        return unspecified

    def _get_var_name_list(self):
        result = SCRAMTask._get_var_name_list(self)
        if not self._has_dataset:
            result.append('MAX_EVENTS')
        return result

    def _process_config_file_list(self, config, config_file_list,
                                  fragment_path, auto_prepare, must_prepare):
        # process list of uninitialized config files
        iter_uninitialized_config_files = self._config_find_uninitialized(
            config, config_file_list, auto_prepare, must_prepare)
        for (cfg, cfg_new, do_prepare) in iter_uninitialized_config_files:
            ask_user_msg = 'Do you want to prepare %s for running over the dataset?' % cfg
            if do_prepare and (auto_prepare
                               or self._uii.prompt_bool(ask_user_msg, True)):
                self._config_store_backup(cfg, cfg_new, fragment_path)
            else:
                self._config_store_backup(cfg, cfg_new)

        result = []
        for cfg in config_file_list:
            cfg_new = config.get_work_path(os.path.basename(cfg))
            if not os.path.exists(cfg_new):
                raise ConfigError(
                    'Config file %r was not copied to the work directory!' %
                    cfg)
            is_instrumented = self._config_is_instrumented(cfg_new)
            if must_prepare and not is_instrumented:
                raise ConfigError(
                    'Config file %r must use %s to work properly!' %
                    (cfg,
                     str.join(
                         ', ',
                         imap(lambda x: '@%s@' % x, sorted(
                             self._needed_vn_set)))))
            if auto_prepare and not is_instrumented:
                self._log.warning('Config file %r was not instrumented!', cfg)
            result.append(cfg_new)
        return result
Exemplo n.º 17
0
    def __init__(self, config, name):
        config.set('se input timeout', '0:30')
        config.set('application', 'cmsRun', section='dashboard')
        config.set('dataset provider', 'DBS3Provider')
        config.set('dataset splitter', 'EventBoundarySplitter')
        config.set('dataset processor', 'LumiDataProcessor', '+=')
        config.set('partition processor', 'BasicPartitionProcessor', '-=')
        config.set(
            'partition processor',
            'LFNPartitionProcessor LumiPartitionProcessor CMSSWPartitionProcessor',
            '+=')

        self._needed_vn_set = set()
        SCRAMTask.__init__(self, config, name)
        self._uii = UserInputInterface()

        # Setup file path informations
        self._cmsrun_output_files = ['cmssw.dbs.tar.gz']
        if self._do_gzip_std_output:
            self._cmsrun_output_files.append('cmssw.log.gz')
        self._script_fpi = Result(path_rel='gc-run.cmssw.sh',
                                  path_abs=get_path_share(
                                      'gc-run.cmssw.sh',
                                      pkg='grid_control_cms'))

        if self._scram_project != 'CMSSW':
            raise ConfigError('Project area contains no CMSSW project')

        self._old_release_top = None
        if self._project_area:
            scram_arch_env_path = os.path.join(self._project_area, '.SCRAM',
                                               self._scram_arch, 'Environment')
            self._old_release_top = self._parse_scram_file(
                scram_arch_env_path).get('RELEASETOP')

        self._update_map_error_code2msg(
            get_path_share('gc-run.cmssw.sh', pkg='grid_control_cms'))

        self._project_area_tarball_on_se = config.get_bool(
            ['se runtime', 'se project area'], True)
        self._project_area_tarball = config.get_work_path(
            'cmssw-project-area.tar.gz')

        # Prolog / Epilog script support - warn about old syntax
        self.prolog = TaskExecutableWrapper(config, 'prolog', '')
        self.epilog = TaskExecutableWrapper(config, 'epilog', '')
        if config.get_fn_list('executable', []) != []:
            raise ConfigError(
                'Prefix executable and argument options with either prolog or epilog!'
            )
        self.arguments = config.get('arguments', '')

        # Get cmssw config files and check their existance
        # Check that for dataset jobs the necessary placeholders are in the config file
        if not self._has_dataset:
            self._events_per_job = config.get('events per job', '0')
            # this can be a variable like @USER_EVENTS@!
            self._needed_vn_set.add('MAX_EVENTS')
        fragment = config.get_fn(
            'instrumentation fragment',
            get_path_share('fragmentForCMSSW.py', pkg='grid_control_cms'))
        self._config_fn_list = self._process_config_file_list(
            config,
            config.get_fn_list('config file', self._get_config_file_default()),
            fragment,
            auto_prepare=config.get_bool('instrumentation', True),
            must_prepare=self._has_dataset)

        # Create project area tarball
        if self._project_area and not os.path.exists(
                self._project_area_tarball):
            config.set_state(True, 'init', detail='sandbox')
        # Information about search order for software environment
        self._cmssw_search_dict = self._get_cmssw_path_list(config)
        if config.get_state('init', detail='sandbox'):
            msg = 'CMSSW tarball already exists! Do you want to regenerate it?'
            if os.path.exists(
                    self._project_area_tarball) and not self._uii.prompt_bool(
                        msg, True):
                return
            # Generate CMSSW tarball
            if self._project_area:
                create_tarball(_match_files(
                    self._project_area, self._project_area_matcher,
                    self._always_matcher.create_matcher(''),
                    self._project_area_base_fn),
                               name=self._project_area_tarball)
            if self._project_area_tarball_on_se:
                config.set_state(True, 'init', detail='storage')
Exemplo n.º 18
0
class CMSSW(DataTask):
	getConfigSections = DataTask.createFunction_getConfigSections(['CMSSW'])

	def __init__(self, config, name):
		config.set('se input timeout', '0:30', override = False)
		config.set('dataset provider', 'DBS3Provider', override = False)
		config.set('dataset splitter', 'EventBoundarySplitter', override = False)
		DataTask.__init__(self, config, name)
		self.errorDict.update(dict(self.updateErrorDict(utils.pathShare('gc-run.cmssw.sh', pkg = 'grid_control_cms'))))

		# SCRAM info
		scramProject = config.getList('scram project', [])
		if len(scramProject):
			self.projectArea = config.getPath('project area', '')
			if len(self.projectArea):
				raise ConfigError('Cannot specify both SCRAM project and project area')
			if len(scramProject) != 2:
				raise ConfigError('SCRAM project needs exactly 2 arguments: PROJECT VERSION')
		else:
			self.projectArea = config.getPath('project area')

		# This works in tandem with provider_dbsv2.py !
		self.selectedLumis = parseLumiFilter(config.get('lumi filter', ''))

		self.useReqs = config.getBool('software requirements', True, onChange = None)
		self.seRuntime = config.getBool('se runtime', False)
		self.runtimePath = config.getWorkPath('runtime.tar.gz')

		if len(self.projectArea):
			defaultPattern = '-.* -config bin lib python module */data *.xml *.sql *.cf[if] *.py -*/.git -*/.svn -*/CVS -*/work.*'
			self.pattern = config.getList('area files', defaultPattern.split())

			if os.path.exists(self.projectArea):
				utils.vprint('Project area found in: %s' % self.projectArea, -1)
			else:
				raise ConfigError('Specified config area %r does not exist!' % self.projectArea)

			scramPath = os.path.join(self.projectArea, '.SCRAM')
			# try to open it
			try:
				fp = open(os.path.join(scramPath, 'Environment'), 'r')
				self.scramEnv = utils.DictFormat().parse(fp, keyParser = {None: str})
			except:
				raise ConfigError('Project area file %s/.SCRAM/Environment cannot be parsed!' % self.projectArea)

			for key in ['SCRAM_PROJECTNAME', 'SCRAM_PROJECTVERSION']:
				if key not in self.scramEnv:
					raise ConfigError('Installed program in project area not recognized.')

			archs = filter(lambda x: os.path.isdir(os.path.join(scramPath, x)) and not x.startswith('.'), os.listdir(scramPath))
			self.scramArch = config.get('scram arch', (archs + [noDefault])[0])
			try:
				fp = open(os.path.join(scramPath, self.scramArch, 'Environment'), 'r')
				self.scramEnv.update(utils.DictFormat().parse(fp, keyParser = {None: str}))
			except:
				raise ConfigError('Project area file .SCRAM/%s/Environment cannot be parsed!' % self.scramArch)
		else:
			self.scramEnv = {
				'SCRAM_PROJECTNAME': scramProject[0],
				'SCRAM_PROJECTVERSION': scramProject[1]
			}
			self.scramArch = config.get('scram arch')

		self.scramVersion = config.get('scram version', 'scramv1')
		if self.scramEnv['SCRAM_PROJECTNAME'] != 'CMSSW':
			raise ConfigError('Project area not a valid CMSSW project area.')

		# Information about search order for software environment
		self.searchLoc = []
		if config.getState('sandbox'):
			userPath = config.get('cmssw dir', '')
			if userPath != '':
				self.searchLoc.append(('CMSSW_DIR_USER', userPath))
			if self.scramEnv.get('RELEASETOP', None):
				projPath = os.path.normpath('%s/../../../../' % self.scramEnv['RELEASETOP'])
				self.searchLoc.append(('CMSSW_DIR_PRO', projPath))
		if len(self.searchLoc):
			utils.vprint('Local jobs will try to use the CMSSW software located here:', -1)
			for i, loc in enumerate(self.searchLoc):
				key, value = loc
				utils.vprint(' %i) %s' % (i + 1, value), -1)

		# Prolog / Epilog script support - warn about old syntax
		self.prolog = TaskExecutableWrapper(config, 'prolog', '')
		self.epilog = TaskExecutableWrapper(config, 'epilog', '')
		if config.getPaths('executable', []) != []:
			raise ConfigError('Prefix executable and argument options with either prolog or epilog!')
		self.arguments = config.get('arguments', '')

		# Get cmssw config files and check their existance
		self.configFiles = []
		cfgDefault = QM(self.prolog.isActive() or self.epilog.isActive(), [], noDefault)
		for cfgFile in config.getPaths('config file', cfgDefault, mustExist = False):
			newPath = config.getWorkPath(os.path.basename(cfgFile))
			if not os.path.exists(newPath):
				if not os.path.exists(cfgFile):
					raise ConfigError('Config file %r not found.' % cfgFile)
				shutil.copyfile(cfgFile, newPath)
			self.configFiles.append(newPath)

		# Check that for dataset jobs the necessary placeholders are in the config file
		self.prepare = config.getBool('prepare config', False)
		fragment = config.getPath('instrumentation fragment',
			os.path.join('packages', 'grid_control_cms', 'share', 'fragmentForCMSSW.py'))
		if self.dataSplitter != None:
			if config.getState('sandbox'):
				if len(self.configFiles) > 0:
					self.instrumentCfgQueue(self.configFiles, fragment, mustPrepare = True)
		else:
			self.eventsPerJob = config.get('events per job', '0')
			if config.getState(detail = 'sandbox') and self.prepare:
				self.instrumentCfgQueue(self.configFiles, fragment)
		if not os.path.exists(config.getWorkPath('runtime.tar.gz')):
			config.setState(True, detail = 'sandbox')
		if config.getState(detail = 'sandbox'):
			if os.path.exists(config.getWorkPath('runtime.tar.gz')):
				if not utils.getUserBool('Runtime already exists! Do you want to regenerate CMSSW tarball?', True):
					return
			# Generate runtime tarball (and move to SE)
			if self.projectArea:
				utils.genTarball(config.getWorkPath('runtime.tar.gz'), utils.matchFiles(self.projectArea, self.pattern))
			if self.seRuntime:
				config.setState(True, detail = 'storage')


	def initDataProcessor(self):
		return CMSDataSplitProcessor(self.checkSE)


	def instrumentCfgQueue(self, cfgFiles, fragment, mustPrepare = False):
		def isInstrumented(cfgName):
			cfg = open(cfgName, 'r').read()
			for tag in self.neededVars():
				if (not '__%s__' % tag in cfg) and (not '@%s@' % tag in cfg):
					return False
			return True
		def doInstrument(cfgName):
			if not isInstrumented(cfgName) or 'customise_for_gc' not in open(cfgName, 'r').read():
				utils.vprint('Instrumenting...', os.path.basename(cfgName), -1)
				open(cfgName, 'a').write(open(fragment, 'r').read())
			else:
				utils.vprint('%s already contains customise_for_gc and all needed variables' % os.path.basename(cfgName), -1)

		cfgStatus = []
		comPath = os.path.dirname(os.path.commonprefix(cfgFiles))
		for cfg in cfgFiles:
			cfgStatus.append({0: cfg.split(comPath, 1)[1].lstrip('/'), 1: str(isInstrumented(cfg)), 2: cfg})
		utils.printTabular([(0, 'Config file'), (1, 'Instrumented')], cfgStatus, 'lc')

		for cfg in cfgFiles:
			if self.prepare or not isInstrumented(cfg):
				if self.prepare or utils.getUserBool('Do you want to prepare %s for running over the dataset?' % cfg, True):
					doInstrument(cfg)
		if mustPrepare and not (True in map(isInstrumented, cfgFiles)):
			raise ConfigError('A config file must use %s to work properly!' %
				str.join(', ', map(lambda x: '@%s@' % x, self.neededVars())))


	# Lumi filter need
	def neededVars(self):
		result = []
		varMap = {
			DataSplitter.NEntries: 'MAX_EVENTS',
			DataSplitter.Skipped: 'SKIP_EVENTS',
			DataSplitter.FileList: 'FILE_NAMES'
		}
		if self.dataSplitter:
			result.extend(map(lambda x: varMap[x], self.dataSplitter.neededVars()))
		if self.selectedLumis:
			result.append('LUMI_RANGE')
		return result


	# Called on job submission
	def getSubmitInfo(self, jobNum):
		result = DataTask.getSubmitInfo(self, jobNum)
		result.update({'application': self.scramEnv['SCRAM_PROJECTVERSION'], 'exe': 'cmsRun'})
		if self.dataSplitter == None:
			result.update({'nevtJob': self.eventsPerJob})
		return result


	# Get environment variables for gc_config.sh
	def getTaskConfig(self):
		data = DataTask.getTaskConfig(self)
		data.update(dict(self.searchLoc))
		data['CMSSW_OLD_RELEASETOP'] = self.scramEnv.get('RELEASETOP', None)
		data['DB_EXEC'] = 'cmsRun'
		data['SCRAM_ARCH'] = self.scramArch
		data['SCRAM_VERSION'] = self.scramVersion
		data['SCRAM_PROJECTVERSION'] = self.scramEnv['SCRAM_PROJECTVERSION']
		data['GZIP_OUT'] = QM(self.gzipOut, 'yes', 'no')
		data['SE_RUNTIME'] = QM(self.seRuntime, 'yes', 'no')
		data['HAS_RUNTIME'] = QM(len(self.projectArea), 'yes', 'no')
		data['CMSSW_CONFIG'] = str.join(' ', map(os.path.basename, self.configFiles))
		if self.prolog.isActive():
			data['CMSSW_PROLOG_EXEC'] = self.prolog.getCommand()
			data['CMSSW_PROLOG_SB_In_FILES'] = str.join(' ', self.prolog.getSBInFiles())
			data['CMSSW_PROLOG_ARGS'] = self.prolog.getArguments()
		if self.epilog.isActive():
			data['CMSSW_EPILOG_EXEC'] = self.epilog.getCommand()
			data['CMSSW_EPILOG_SB_In_FILES'] = str.join(' ', self.epilog.getSBInFiles())
			data['CMSSW_EPILOG_ARGS'] = self.epilog.getArguments()
		return data


	# Get job requirements
	def getRequirements(self, jobNum):
		reqs = DataTask.getRequirements(self, jobNum)
		if self.useReqs:
			reqs.append((WMS.SOFTWARE, 'VO-cms-%s' % self.scramEnv['SCRAM_PROJECTVERSION']))
			reqs.append((WMS.SOFTWARE, 'VO-cms-%s' % self.scramArch))
		return reqs


	# Get files to be transfered via SE (description, source, target)
	def getSEInFiles(self):
		files = DataTask.getSEInFiles(self)
		if len(self.projectArea) and self.seRuntime:
			return files + [('CMSSW runtime', self.runtimePath, self.taskID + '.tar.gz')]
		return files


	# Get files for input sandbox
	def getSBInFiles(self):
		files = DataTask.getSBInFiles(self) + self.configFiles + self.prolog.getSBInFiles() + self.epilog.getSBInFiles()
		if len(self.projectArea) and not self.seRuntime:
			files.append(self.runtimePath)
		return files + [utils.pathShare('gc-run.cmssw.sh', pkg = 'grid_control_cms')]


	# Get files for output sandbox
	def getSBOutFiles(self):
		return DataTask.getSBOutFiles(self) + QM(self.gzipOut, ['cmssw.log.gz'], []) + ['cmssw.dbs.tar.gz']


	def getCommand(self):
		return './gc-run.cmssw.sh $@'


	def getJobArguments(self, jobNum):
		return DataTask.getJobArguments(self, jobNum) + ' ' + self.arguments


	def getActiveLumiFilter(self, lumifilter, jobNum = None):
		getLR = lambda x: str.join(',', map(lambda x: '"%s"' % x, formatLumi(x)))
		return getLR(lumifilter) # TODO: Validate subset selection
		try:
			splitInfo = self.dataSplitter.getSplitInfo(jobNum)
			runTag = splitInfo[DataSplitter.MetadataHeader].index("Runs")
			runList = utils.listMapReduce(lambda m: m[runTag], splitInfo[DataSplitter.Metadata])
			return getLR(filterLumiFilter(runList, lumifilter))
		except:
			return getLR(lumifilter)


	def getVarNames(self):
		result = DataTask.getVarNames(self)
		if self.dataSplitter == None:
			result.append('MAX_EVENTS')
		if self.selectedLumis:
			result.append('LUMI_RANGE')
		return result


	# Get job dependent environment variables
	def getJobConfig(self, jobNum):
		data = DataTask.getJobConfig(self, jobNum)
		if self.dataSplitter == None:
			data['MAX_EVENTS'] = self.eventsPerJob
		if self.selectedLumis:
			data['LUMI_RANGE'] = self.getActiveLumiFilter(self.selectedLumis)
		return data


	def getDescription(self, jobNum): # (task name, job name, type)
		(taskName, jobName, jobType) = DataTask.getDescription(self, jobNum)
		return (taskName, jobName, QM(jobType, jobType, 'analysis'))


	def getDependencies(self):
		return DataTask.getDependencies(self) + ['cmssw']
Exemplo n.º 19
0
	def __init__(self, config, name):
		DataTask.__init__(self, config, name)
		self._exe = TaskExecutableWrapper(config)
Exemplo n.º 20
0
class CMSSW(SCRAMTask):
	configSections = SCRAMTask.configSections + ['CMSSW']

	def __init__(self, config, name):
		config.set('se input timeout', '0:30')
		config.set('dataset provider', 'DBS3Provider')
		config.set('dataset splitter', 'EventBoundarySplitter')
		config.set('dataset processor', 'LumiDataProcessor', '+=')
		config.set('partition processor', 'TFCPartitionProcessor LocationPartitionProcessor MetaPartitionProcessor ' +
			'LFNPartitionProcessor LumiPartitionProcessor CMSSWPartitionProcessor')
		dash_config = config.changeView(viewClass = 'SimpleConfigView', setSections = ['dashboard'])
		dash_config.set('application', 'cmsRun')
		SCRAMTask.__init__(self, config, name)
		if self._scramProject != 'CMSSW':
			raise ConfigError('Project area contains no CMSSW project')

		self._oldReleaseTop = None
		if self._projectArea:
			self._oldReleaseTop = self._parse_scram_file(os.path.join(self._projectArea, '.SCRAM', self._scramArch, 'Environment')).get('RELEASETOP', None)

		self.updateErrorDict(utils.pathShare('gc-run.cmssw.sh', pkg = 'grid_control_cms'))

		self._projectAreaTarballSE = config.getBool(['se runtime', 'se project area'], True)
		self._projectAreaTarball = config.getWorkPath('cmssw-project-area.tar.gz')

		# Prolog / Epilog script support - warn about old syntax
		self.prolog = TaskExecutableWrapper(config, 'prolog', '')
		self.epilog = TaskExecutableWrapper(config, 'epilog', '')
		if config.getPaths('executable', []) != []:
			raise ConfigError('Prefix executable and argument options with either prolog or epilog!')
		self.arguments = config.get('arguments', '')

		# Get cmssw config files and check their existance
		# Check that for dataset jobs the necessary placeholders are in the config file
		if self._dataSplitter is None:
			self.eventsPerJob = config.get('events per job', '0') # this can be a variable like @USER_EVENTS@!
		fragment = config.getPath('instrumentation fragment', utils.pathShare('fragmentForCMSSW.py', pkg = 'grid_control_cms'))
		self.configFiles = self._processConfigFiles(config, list(self._getConfigFiles(config)), fragment,
			autoPrepare = config.getBool('instrumentation', True),
			mustPrepare = (self._dataSplitter is not None))

		# Create project area tarball
		if self._projectArea and not os.path.exists(self._projectAreaTarball):
			config.setState(True, 'init', detail = 'sandbox')
		# Information about search order for software environment
		self.searchLoc = self._getCMSSWPaths(config)
		if config.getState('init', detail = 'sandbox'):
			if os.path.exists(self._projectAreaTarball):
				if not utils.getUserBool('CMSSW tarball already exists! Do you want to regenerate it?', True):
					return
			# Generate CMSSW tarball
			if self._projectArea:
				utils.genTarball(self._projectAreaTarball, utils.matchFiles(self._projectArea, self._projectAreaPattern))
			if self._projectAreaTarballSE:
				config.setState(True, 'init', detail = 'storage')


	def _getCMSSWPaths(self, config):
		result = []
		userPath = config.get(['cmssw dir', 'vo software dir'], '')
		if userPath:
			userPathLocal = os.path.abspath(utils.cleanPath(userPath))
			if os.path.exists(userPathLocal):
				userPath = userPathLocal
		if userPath:
			result.append(('CMSSW_DIR_USER', userPath))
		if self._oldReleaseTop:
			projPath = os.path.normpath('%s/../../../../' % self._oldReleaseTop)
			result.append(('CMSSW_DIR_PRO', projPath))
		log = logging.getLogger('user')
		log.info('Local jobs will try to use the CMSSW software located here:')
		for i, loc in enumerate(result):
			log.info(' %i) %s', i + 1, loc[1])
		if result:
			log.info('')
		return result


	def _getConfigFiles(self, config):
		cfgDefault = utils.QM(self.prolog.isActive() or self.epilog.isActive(), [], noDefault)
		for cfgFile in config.getPaths('config file', cfgDefault, mustExist = False):
			if not os.path.exists(cfgFile):
				raise ConfigError('Config file %r not found.' % cfgFile)
			yield cfgFile


	def _cfgIsInstrumented(self, fn):
		fp = open(fn, 'r')
		try:
			cfg = fp.read()
		finally:
			fp.close()
		for tag in self.neededVars():
			if (not '__%s__' % tag in cfg) and (not '@%s@' % tag in cfg):
				return False
		return True


	def _cfgStore(self, source, target, fragment_path = None):
		fp = open(source, 'r')
		try:
			content = fp.read()
		finally:
			fp.close()
		fp = open(target, 'w')
		try:
			fp.write(content)
			if fragment_path:
				logging.getLogger('user').info('Instrumenting... %s', os.path.basename(source))
				fragment_fp = open(fragment_path, 'r')
				fp.write(fragment_fp.read())
				fragment_fp.close()
		finally:
			fp.close()


	def _cfgFindUninitialized(self, config, cfgFiles, autoPrepare, mustPrepare):
		comPath = os.path.dirname(os.path.commonprefix(cfgFiles))

		cfgTodo = []
		cfgStatus = []
		for cfg in cfgFiles:
			cfg_new = config.getWorkPath(os.path.basename(cfg))
			cfg_new_exists = os.path.exists(cfg_new)
			if cfg_new_exists:
				isInstrumented = self._cfgIsInstrumented(cfg_new)
				doCopy = False
			else:
				isInstrumented = self._cfgIsInstrumented(cfg)
				doCopy = True
			doPrepare = (mustPrepare or autoPrepare) and not isInstrumented
			doCopy = doCopy or doPrepare
			if doCopy:
				cfgTodo.append((cfg, cfg_new, doPrepare))
			cfgStatus.append({1: cfg.split(comPath, 1)[1].lstrip('/'), 2: cfg_new_exists,
				3: isInstrumented, 4: doPrepare})

		if cfgStatus:
			utils.printTabular([(1, 'Config file'), (2, 'Work dir'), (3, 'Instrumented'), (4, 'Scheduled')], cfgStatus, 'lccc')
		return cfgTodo


	def _processConfigFiles(self, config, cfgFiles, fragment_path, autoPrepare, mustPrepare):
		# process list of uninitialized config files
		for (cfg, cfg_new, doPrepare) in self._cfgFindUninitialized(config, cfgFiles, autoPrepare, mustPrepare):
			if doPrepare and (autoPrepare or utils.getUserBool('Do you want to prepare %s for running over the dataset?' % cfg, True)):
				self._cfgStore(cfg, cfg_new, fragment_path)
			else:
				self._cfgStore(cfg, cfg_new)

		result = []
		for cfg in cfgFiles:
			cfg_new = config.getWorkPath(os.path.basename(cfg))
			if not os.path.exists(cfg_new):
				raise ConfigError('Config file %r was not copied to the work directory!' % cfg)
			isInstrumented = self._cfgIsInstrumented(cfg_new)
			if mustPrepare and not isInstrumented:
				raise ConfigError('Config file %r must use %s to work properly!' %
					(cfg, str.join(', ', imap(lambda x: '@%s@' % x, self.neededVars()))))
			if autoPrepare and not isInstrumented:
				self._log.warning('Config file %r was not instrumented!', cfg)
			result.append(cfg_new)
		return result


	def neededVars(self):
		if self._dataSplitter:
			return self._partProcessor.getNeededKeys(self._dataSplitter) or []
		return ['MAX_EVENTS']


	# Get environment variables for gc_config.sh
	def getTaskConfig(self):
		data = SCRAMTask.getTaskConfig(self)
		data.update(dict(self.searchLoc))
		data['GZIP_OUT'] = utils.QM(self.gzipOut, 'yes', 'no')
		data['SE_RUNTIME'] = utils.QM(self._projectAreaTarballSE, 'yes', 'no')
		data['HAS_RUNTIME'] = utils.QM(self._projectArea, 'yes', 'no')
		data['CMSSW_EXEC'] = 'cmsRun'
		data['CMSSW_CONFIG'] = str.join(' ', imap(os.path.basename, self.configFiles))
		data['CMSSW_OLD_RELEASETOP'] = self._oldReleaseTop
		if self.prolog.isActive():
			data['CMSSW_PROLOG_EXEC'] = self.prolog.getCommand()
			data['CMSSW_PROLOG_SB_IN_FILES'] = str.join(' ', imap(lambda x: x.pathRel, self.prolog.getSBInFiles()))
			data['CMSSW_PROLOG_ARGS'] = self.prolog.getArguments()
		if self.epilog.isActive():
			data['CMSSW_EPILOG_EXEC'] = self.epilog.getCommand()
			data['CMSSW_EPILOG_SB_IN_FILES'] = str.join(' ', imap(lambda x: x.pathRel, self.epilog.getSBInFiles()))
			data['CMSSW_EPILOG_ARGS'] = self.epilog.getArguments()
		return data


	# Get files to be transfered via SE (description, source, target)
	def getSEInFiles(self):
		files = SCRAMTask.getSEInFiles(self)
		if self._projectArea and self._projectAreaTarballSE:
			return files + [('CMSSW tarball', self._projectAreaTarball, self.taskID + '.tar.gz')]
		return files


	# Get files for input sandbox
	def getSBInFiles(self):
		files = SCRAMTask.getSBInFiles(self) + self.prolog.getSBInFiles() + self.epilog.getSBInFiles()
		for cfgFile in self.configFiles:
			files.append(utils.Result(pathAbs = cfgFile, pathRel = os.path.basename(cfgFile)))
		if self._projectArea and not self._projectAreaTarballSE:
			files.append(utils.Result(pathAbs = self._projectAreaTarball, pathRel = os.path.basename(self._projectAreaTarball)))
		return files + [utils.Result(pathAbs = utils.pathShare('gc-run.cmssw.sh', pkg = 'grid_control_cms'), pathRel = 'gc-run.cmssw.sh')]


	# Get files for output sandbox
	def getSBOutFiles(self):
		if not self.configFiles:
			return SCRAMTask.getSBOutFiles(self)
		return SCRAMTask.getSBOutFiles(self) + utils.QM(self.gzipOut, ['cmssw.log.gz'], []) + ['cmssw.dbs.tar.gz']


	def getCommand(self):
		return './gc-run.cmssw.sh $@'


	def getJobArguments(self, jobNum):
		return SCRAMTask.getJobArguments(self, jobNum) + ' ' + self.arguments


	def getVarNames(self):
		result = SCRAMTask.getVarNames(self)
		if self._dataSplitter is None:
			result.append('MAX_EVENTS')
		return result


	# Get job dependent environment variables
	def getJobConfig(self, jobNum):
		data = SCRAMTask.getJobConfig(self, jobNum)
		if self._dataSplitter is None:
			data['MAX_EVENTS'] = self.eventsPerJob
		return data


	def getDescription(self, jobNum): # (task name, job name, type)
		result = SCRAMTask.getDescription(self, jobNum)
		if not result.jobType:
			result.jobType = 'analysis'
		return result