Exemplo n.º 1
0
    def __init__(self, config, datasetExpr, datasetNick=None, datasetID=0):
        self._lumi_filter = parseLumiFilter(config.get('lumi filter', ''))
        if self._lumi_filter:
            config.set('dataset processor', 'LumiDataProcessor', '+=')
        DataProvider.__init__(self, config, datasetExpr, datasetNick,
                              datasetID)
        # PhEDex blacklist: 'T1_DE_KIT', 'T1_US_FNAL' and '*_Disk' allow user jobs - other T1's dont!
        self._lumi_query = config.getBool('lumi metadata',
                                          self._lumi_filter != [])
        self._phedexFilter = config.getFilter('phedex sites',
                                              '-T3_US_FNALLPC',
                                              defaultMatcher='blackwhite',
                                              defaultFilter='weak')
        self._phedexT1Filter = config.getFilter('phedex t1 accept',
                                                'T1_DE_KIT T1_US_FNAL',
                                                defaultMatcher='blackwhite',
                                                defaultFilter='weak')
        self._phedexT1Mode = config.get('phedex t1 mode', 'disk').lower()
        self.onlyComplete = config.getBool('only complete sites', True)
        self._locationFormat = config.getEnum('location format',
                                              CMSLocationFormat,
                                              CMSLocationFormat.hostname)

        (self._datasetPath, self._url,
         self._datasetBlock) = utils.optSplit(datasetExpr, '@#')
        self._url = self._url or config.get('dbs instance', '')
        self._datasetBlock = self._datasetBlock or 'all'
        self.onlyValid = config.getBool('only valid', True)
Exemplo n.º 2
0
    def __init__(self, config, name):
        head = [(0, "Nickname")]

        # Mapping between nickname and config files:
        cfgList = config.get("nickname config", "")
        self.nmCfg = config.getDict(
            "nickname config", {}, parser=lambda x: map(str.strip, x.split(",")), str=lambda x: str.join(",", x)
        )[0]
        if cfgList:
            if "config file" in config.getOptions():
                raise ConfigError("Please use 'nickname config' instead of 'config file'")
            allConfigFiles = utils.flatten(self.nmCfg.values())
            config.set("config file", str.join("\n", allConfigFiles))
            head.append((1, "Config file"))

            # Mapping between nickname and constants:
        self.nmCName = map(str.strip, config.get("nickname constants", "").split())
        self.nmConst = {}
        for var in self.nmCName:
            tmp = config.getDict(var, {})[0]
            for (nick, value) in tmp.items():
                if value:
                    self.nmConst.setdefault(nick, {})[var] = value
                else:
                    self.nmConst.setdefault(nick, {})[var] = ""
            head.append((var, var))

            # Mapping between nickname and lumi filter:
        if "lumi filter" in config.getOptions():
            raise ConfigError("Please use 'nickname lumi filter' instead of 'lumi filter'")
        lumiParse = lambda x: formatLumi(parseLumiFilter(x))
        self.nmLumi = config.getDict("nickname lumi filter", {}, parser=lumiParse)[0]
        if self.nmLumi:
            for dataset in config.get("dataset", "").splitlines():
                (datasetNick, datasetProvider, datasetExpr) = DataProvider.parseDatasetExpr(config, dataset, None)
                config.set(
                    "dataset %s" % datasetNick,
                    "lumi filter",
                    str.join(",", utils.flatten(fromNM(self.nmLumi, datasetNick, []))),
                )
            config.set("lumi filter", str.join(",", self.nmLumi.get(None, [])))
            head.append((2, "Lumi filter"))

        utils.vprint("Mapping between nickname and other settings:\n", -1)

        def report():
            for nick in sorted(set(self.nmCfg.keys() + self.nmConst.keys() + self.nmLumi.keys())):
                tmp = {
                    0: nick,
                    1: str.join(", ", map(os.path.basename, self.nmCfg.get(nick, ""))),
                    2: self.displayLumi(self.nmLumi.get(nick, "")),
                }
                yield utils.mergeDicts([tmp, self.nmConst.get(nick, {})])

        utils.printTabular(head, report(), "cl")
        utils.vprint(level=-1)
        CMSSW.__init__(self, config, name)
Exemplo n.º 3
0
 def getVarsForNick(self, nick):
     data = {"CMSSW_CONFIG": str.join(" ", map(os.path.basename, utils.flatten(fromNM(self.nmCfg, nick, ""))))}
     constants = utils.mergeDicts(fromNM(self.nmConst, None, {}) + fromNM(self.nmConst, nick, {}))
     constants = dict(map(lambda var: (var, constants.get(var, "")), self.nmCName))
     data.update(constants)
     lumifilter = utils.flatten(fromNM(self.nmLumi, nick, ""))
     if lumifilter:
         data["LUMI_RANGE"] = parseLumiFilter(str.join(",", lumifilter))
     return data
Exemplo n.º 4
0
def lumi_expr(opts, args):
	if len(args) == 0:
		raise Exception('No arguments given!')
	try:
		lumis = parseLumiFilter(str.join(' ', args))
	except Exception:
		raise Exception('Could not parse: %s' % str.join(' ', args))

	if opts.gc:
		outputGC(lumis)
	if opts.json:
		outputJSON(lumis)
	if opts.full:
		result = {}
		for rlrange in lumis:
			start, end = rlrange
			assert(start[0] == end[0])
			result.setdefault(start[0], []).extend(irange(start[1], end[1] + 1))
		print(result)
Exemplo n.º 5
0
	def __init__(self, config, datasetExpr, datasetNick = None, datasetID = 0):
		self._lumi_filter = parseLumiFilter(config.get('lumi filter', ''))
		if self._lumi_filter:
			config.set('dataset processor', 'LumiDataProcessor', '+=')
		DataProvider.__init__(self, config, datasetExpr, datasetNick, datasetID)
		# PhEDex blacklist: 'T1_DE_KIT', 'T1_US_FNAL' and '*_Disk' allow user jobs - other T1's dont!
		self._lumi_query = config.getBool('lumi metadata', self._lumi_filter != [])
		self._phedexFilter = config.getFilter('phedex sites', '-T3_US_FNALLPC',
			defaultMatcher = 'blackwhite', defaultFilter = 'weak')
		self._phedexT1Filter = config.getFilter('phedex t1 accept', 'T1_DE_KIT T1_US_FNAL',
			defaultMatcher = 'blackwhite', defaultFilter = 'weak')
		self._phedexT1Mode = config.get('phedex t1 mode', 'disk').lower()
		self.onlyComplete = config.getBool('only complete sites', True)
		self._locationFormat = config.getEnum('location format', CMSLocationFormat, CMSLocationFormat.hostname)

		(self._datasetPath, self._url, self._datasetBlock) = utils.optSplit(datasetExpr, '@#')
		self._url = self._url or config.get('dbs instance', '')
		self._datasetBlock = self._datasetBlock or 'all'
		self.onlyValid = config.getBool('only valid', True)
Exemplo n.º 6
0
def lumi_expr(opts, args):
    if len(args) == 0:
        raise Exception('No arguments given!')
    try:
        lumis = parseLumiFilter(str.join(' ', args))
    except Exception:
        raise Exception('Could not parse: %s' % str.join(' ', args))

    if opts.gc:
        outputGC(lumis)
    if opts.json:
        outputJSON(lumis)
    if opts.full:
        result = {}
        for rlrange in lumis:
            start, end = rlrange
            assert (start[0] == end[0])
            result.setdefault(start[0],
                              []).extend(irange(start[1], end[1] + 1))
        print(result)
Exemplo n.º 7
0
	def __init__(self, config, datasetExpr, datasetNick, datasetID = 0):
		DataProvider.__init__(self, config, datasetExpr, datasetNick, datasetID)
		# PhEDex blacklist: 'T1_DE_KIT', 'T1_US_FNAL' and '*_Disk' allow user jobs - other T1's dont!
		self.phedexBL = config.getList('phedex sites', ['-T3_US_FNALLPC'])
		self.phedexWL = config.getList('phedex t1 accept', ['T1_DE_KIT', 'T1_US_FNAL'])
		self.phedexT1 = config.get('phedex t1 mode', 'disk').lower()
		self.onlyComplete = config.getBool('only complete sites', True)
		self.locationFormat = config.get('location format', 'hostname').lower() # hostname or sitedb
		if self.locationFormat not in ['hostname', 'sitedb', 'both']:
			raise ConfigError('Invalid location format: %s' % self.locationFormat)

		(self.datasetPath, self.url, self.datasetBlock) = utils.optSplit(datasetExpr, '@#')
		self.url = utils.QM(self.url, self.url, config.get('dbs instance', ''))
		self.datasetBlock = utils.QM(self.datasetBlock, self.datasetBlock, 'all')
		self.includeLumi = config.getBool('keep lumi metadata', False)
		self.onlyValid = config.getBool('only valid', True)
		self.checkUnique = config.getBool('check unique', True)

		# This works in tandem with active task module (cmssy.py supports only [section] lumi filter!)
		self.selectedLumis = parseLumiFilter(config.get('lumi filter', ''))
		if self.selectedLumis:
			utils.vprint('Runs/lumi section filter enabled! (%d entries)' % len(self.selectedLumis), -1, once = True)
			utils.vprint('\tThe following runs and lumi sections are selected:', 1, once = True)
			utils.vprint('\t' + utils.wrapList(formatLumi(self.selectedLumis), 65, ',\n\t'), 1, once = True)
Exemplo n.º 8
0
def main():
	if opts.save_jobjson or opts.save_jobgc or opts.get_events:
		(workDir, nJobs, jobList) = getWorkJobs(args)
		(log, incomplete, splitter, splitInfo) = (None, False, None, {})
		(lumiDict, readDict, writeDict) = ({}, {}, {})
		try:
			splitter = DataSplitter.loadState(os.path.join(workDir, 'datamap.tar'))
		except Exception:
			pass
		jobList = sorted(jobList)

		for jobNum in jobList:
			del log
			log = utils.ActivityLog('Reading job logs - [%d / %d]' % (jobNum, jobList[-1]))
			jobInfo = getJobInfo(workDir, jobNum, lambda retCode: retCode == 0)
			if not jobInfo:
				if not incomplete:
					print 'WARNING: Not all jobs have finished - results will be incomplete!'
					incomplete = True
				continue

			if not parameterized:
				if splitter:
					splitInfo = splitter.getSplitInfo(jobNum)
				outputName = splitInfo.get(DataSplitter.Nickname, splitInfo.get(DataSplitter.DatasetID, 0))
			else:
				outputName = jobInfo['file'].split()[2].replace("_%d_" % jobNum, '_').replace('/', '_').replace('__', '_')

			# Read framework report files to get number of events
			try:
				outputDir = os.path.join(workDir, 'output', 'job_' + str(jobNum))
				for fwkXML in getCMSSWInfo(os.path.join(outputDir, 'cmssw.dbs.tar.gz')):
					for run in fwkXML.getElementsByTagName('Run'):
						for lumi in run.getElementsByTagName('LumiSection'):
							run_id = int(run.getAttribute('ID'))
							lumi_id = int(lumi.getAttribute('ID'))
							lumiDict.setdefault(outputName, {}).setdefault(run_id, set()).add(lumi_id)
					for outFile in fwkXML.getElementsByTagName('File'):
						pfn = outFile.getElementsByTagName('PFN')[0].childNodes[0].data
						if pfn not in writeDict.setdefault(outputName, {}):
							writeDict[outputName][pfn] = 0
						writeDict[outputName][pfn] += int(outFile.getElementsByTagName('TotalEvents')[0].childNodes[0].data)
					for inFile in fwkXML.getElementsByTagName('InputFile'):
						if outputName not in readDict:
							readDict[outputName] = 0
						readDict[outputName] += int(inFile.getElementsByTagName('EventsRead')[0].childNodes[0].data)
			except KeyboardInterrupt:
				sys.exit(os.EX_OK)
			except Exception:
				raise
				print 'Error while parsing framework output of job %s!' % jobNum
				continue

		del log
		log = utils.ActivityLog('Simplifying lumi sections')
		lumis = {}
		for sample in lumiDict:
			for run in lumiDict[sample]:
				for lumi in lumiDict[sample][run]:
					lumis.setdefault(sample, []).append(([run, lumi], [run, lumi]))
		for sample in lumiDict:
			lumis[sample] = mergeLumi(lumis[sample])
		del log

		for sample, lumis in lumis.items():
			print 'Sample:', sample
			print '========================================='
			print 'Number of events processed: %12d' % readDict[sample]
			print '  Number of events written: %12d' % sum(writeDict.get(sample, {}).values())
			if writeDict.get(sample, None):
				print
				head = [(0, '          Output filename'), (1, 'Events')]
				utils.printTabular(head, map(lambda pfn: {0: pfn, 1: writeDict[sample][pfn]}, writeDict[sample]))
			if opts.save_jobjson:
				outputJSON(lumis, open(os.path.join(workDir, 'processed_%s.json' % sample), 'w'))
				print 'Saved processed lumi sections in', os.path.join(workDir, 'processed_%s.json' % sample)
			if opts.save_jobgc:
				print
				print 'List of processed lumisections:'
				print '-----------------------------------------'
				outputGC(lumis)
			print


	###########################
	# Lumi filter manuipulation
	###########################
	if opts.save_exprgc or opts.save_exprjson or opts.save_exprfull:
		if len(args) == 0:
			raise Exception('No arguments given!')
		try:
			lumis = parseLumiFilter(str.join(' ', args))
		except Exception:
			raise Exception('Could not parse: %s' % str.join(' ', args))

		if opts.save_exprgc:
			outputGC(lumis)
		if opts.save_exprjson:
			outputJSON(lumis)
		if opts.save_exprfull:
			result = {}
			for rlrange in lumis:
				start, end = rlrange
				assert(start[0] == end[0])
				llist = result.setdefault(start[0], []).extend(range(start[1], end[1] + 1))
			print result
Exemplo n.º 9
0
	def __init__(self, config, name):
		config.set('se input timeout', '0:30')
		config.set('dataset provider', 'DBS3Provider')
		config.set('dataset splitter', 'EventBoundarySplitter')
		config.set('dataset processor', 'CMSDataSplitProcessor SECheckSplitProcessor')
		DataTask.__init__(self, config, name)
		self.errorDict.update(dict(self.updateErrorDict(utils.pathShare('gc-run.cmssw.sh', pkg = 'grid_control_cms'))))

		# SCRAM info
		scramProject = config.getList('scram project', [])
		if len(scramProject):
			self.projectArea = config.getPath('project area', '')
			if len(self.projectArea):
				raise ConfigError('Cannot specify both SCRAM project and project area')
			if len(scramProject) != 2:
				raise ConfigError('SCRAM project needs exactly 2 arguments: PROJECT VERSION')
		else:
			self.projectArea = config.getPath('project area')

		# This works in tandem with provider_dbsv2.py !
		self.selectedLumis = parseLumiFilter(config.get('lumi filter', ''))

		self.useReqs = config.getBool('software requirements', True, onChange = None)
		self._projectAreaTarballSE = config.getBool(['se project area', 'se runtime'], True)
		self._projectAreaTarball = config.getWorkPath('cmssw-project-area.tar.gz')

		if len(self.projectArea):
			defaultPattern = '-.* -config bin lib python module */data *.xml *.sql *.cf[if] *.py -*/.git -*/.svn -*/CVS -*/work.*'
			self.pattern = config.getList('area files', defaultPattern.split())

			if os.path.exists(self.projectArea):
				utils.vprint('Project area found in: %s' % self.projectArea, -1)
			else:
				raise ConfigError('Specified config area %r does not exist!' % self.projectArea)

			scramPath = os.path.join(self.projectArea, '.SCRAM')
			# try to open it
			try:
				fp = open(os.path.join(scramPath, 'Environment'), 'r')
				self.scramEnv = utils.DictFormat().parse(fp, keyParser = {None: str})
			except Exception:
				raise ConfigError('Project area file %s/.SCRAM/Environment cannot be parsed!' % self.projectArea)

			for key in ['SCRAM_PROJECTNAME', 'SCRAM_PROJECTVERSION']:
				if key not in self.scramEnv:
					raise ConfigError('Installed program in project area not recognized.')

			archs = filter(lambda x: os.path.isdir(os.path.join(scramPath, x)) and not x.startswith('.'), os.listdir(scramPath))
			self.scramArch = config.get('scram arch', (archs + [noDefault])[0])
			try:
				fp = open(os.path.join(scramPath, self.scramArch, 'Environment'), 'r')
				self.scramEnv.update(utils.DictFormat().parse(fp, keyParser = {None: str}))
			except Exception:
				raise ConfigError('Project area file .SCRAM/%s/Environment cannot be parsed!' % self.scramArch)
		else:
			self.scramEnv = {
				'SCRAM_PROJECTNAME': scramProject[0],
				'SCRAM_PROJECTVERSION': scramProject[1]
			}
			self.scramArch = config.get('scram arch')

		self.scramVersion = config.get('scram version', 'scramv1')
		if self.scramEnv['SCRAM_PROJECTNAME'] != 'CMSSW':
			raise ConfigError('Project area not a valid CMSSW project area.')

		# Information about search order for software environment
		self.searchLoc = []
		if config.getState('init', detail = 'sandbox'):
			userPath = config.get('cmssw dir', '')
			if userPath != '':
				self.searchLoc.append(('CMSSW_DIR_USER', userPath))
			if self.scramEnv.get('RELEASETOP', None):
				projPath = os.path.normpath('%s/../../../../' % self.scramEnv['RELEASETOP'])
				self.searchLoc.append(('CMSSW_DIR_PRO', projPath))
		if len(self.searchLoc):
			utils.vprint('Local jobs will try to use the CMSSW software located here:', -1)
			for i, loc in enumerate(self.searchLoc):
				key, value = loc
				utils.vprint(' %i) %s' % (i + 1, value), -1)

		# Prolog / Epilog script support - warn about old syntax
		self.prolog = TaskExecutableWrapper(config, 'prolog', '')
		self.epilog = TaskExecutableWrapper(config, 'epilog', '')
		if config.getPaths('executable', []) != []:
			raise ConfigError('Prefix executable and argument options with either prolog or epilog!')
		self.arguments = config.get('arguments', '')

		# Get cmssw config files and check their existance
		self.configFiles = []
		cfgDefault = utils.QM(self.prolog.isActive() or self.epilog.isActive(), [], noDefault)
		for cfgFile in config.getPaths('config file', cfgDefault, mustExist = False):
			newPath = config.getWorkPath(os.path.basename(cfgFile))
			if not os.path.exists(newPath):
				if not os.path.exists(cfgFile):
					raise ConfigError('Config file %r not found.' % cfgFile)
				shutil.copyfile(cfgFile, newPath)
			self.configFiles.append(newPath)

		# Check that for dataset jobs the necessary placeholders are in the config file
		self.prepare = config.getBool('prepare config', False)
		fragment = config.getPath('instrumentation fragment', utils.pathShare('fragmentForCMSSW.py', pkg = 'grid_control_cms'))
		if self.dataSplitter != None:
			if config.getState('init', detail = 'sandbox'):
				if len(self.configFiles) > 0:
					self.instrumentCfgQueue(self.configFiles, fragment, mustPrepare = True)
		else:
			self.eventsPerJob = config.get('events per job', '0')
			if config.getState('init', detail = 'sandbox') and self.prepare:
				self.instrumentCfgQueue(self.configFiles, fragment)
		if not os.path.exists(self._projectAreaTarball):
			config.setState(True, 'init', detail = 'sandbox')
		if config.getState('init', detail = 'sandbox'):
			if os.path.exists(self._projectAreaTarball):
				if not utils.getUserBool('CMSSW tarball already exists! Do you want to regenerate it?', True):
					return
			# Generate CMSSW tarball
			if self.projectArea:
				utils.genTarball(self._projectAreaTarball, utils.matchFiles(self.projectArea, self.pattern))
			if self._projectAreaTarballSE:
				config.setState(True, 'init', detail = 'storage')