Пример #1
0
	def main():
		configFactory = createConfigFactory(configFile = args[0], additional = [OptsConfigFiller(parser)])
		config = configFactory.getConfig()
		logging_setup(config.changeView(setSections = ['logging']))

		# Check work dir validity (default work directory is the config file name)
		if not os.path.exists(config.getWorkPath()):
			if not config.getState('init'):
				utils.vprint('Will force initialization of %s if continued!' % config.getWorkPath(), -1)
				config.setState(True, 'init')
			if config.getChoiceYesNo('workdir create', True,
					interactive = 'Do you want to create the working directory %s?' % config.getWorkPath()):
				utils.ensureDirExists(config.getWorkPath(), 'work directory')

		# Create workflow and freeze config settings
		globalConfig = config.changeView(setSections = ['global'])
		workflow = globalConfig.getPlugin('workflow', 'Workflow:global', cls = Workflow).getInstance()
		configFactory.freezeConfig(writeConfig = config.getState('init', detail = 'config'))

		# Give config help
		if opts.help_cfg or opts.help_scfg:
			config.write(sys.stdout, printDefault = opts.help_cfg, printUnused = False,
				printMinimal = opts.help_scfg, printSource = opts.help_cfg)
			sys.exit(os.EX_OK)

		# Check if user requested deletion / reset of jobs
		if opts.delete:
			workflow.jobManager.delete(workflow.wms, opts.delete)
			sys.exit(os.EX_OK)
		if opts.reset:
			workflow.jobManager.reset(workflow.wms, opts.reset)
			sys.exit(os.EX_OK)
		# Run the configured workflow
		workflow.run()
Пример #2
0
    def __init__(self, config, name):
        # Determine ROOT path from previous settings / environment / config file
        self._rootpath = config.get('root path',
                                    os.environ.get('ROOTSYS', ''),
                                    persistent=True,
                                    onChange=changeInitNeeded('sandbox'))
        if not self._rootpath:
            raise ConfigError(
                'Either set environment variable "ROOTSYS" or set option "root path"!'
            )
        utils.vprint('Using the following ROOT path: %s' % self._rootpath, -1)

        # Special handling for executables bundled with ROOT
        self._executable = config.get('executable',
                                      onChange=changeInitNeeded('sandbox'))
        exeFull = os.path.join(self._rootpath, 'bin',
                               self._executable.lstrip('/'))
        self.builtIn = os.path.exists(exeFull)
        if self.builtIn:
            config.set('send executable', 'False')
            # store resolved built-in executable path?

        # Apply default handling from UserTask
        UserTask.__init__(self, config, name)
        self.updateErrorDict(utils.pathShare('gc-run.root.sh'))

        # Collect lib files needed by executable
        self.libFiles = []
Пример #3
0
	def _cfgFindUninitialized(self, config, cfgFiles, autoPrepare, mustPrepare):
		comPath = os.path.dirname(os.path.commonprefix(cfgFiles))

		cfgTodo = []
		cfgStatus = []
		for cfg in cfgFiles:
			cfg_new = config.getWorkPath(os.path.basename(cfg))
			cfg_new_exists = os.path.exists(cfg_new)
			if cfg_new_exists:
				isInstrumented = self._cfgIsInstrumented(cfg_new)
				doCopy = False
			else:
				isInstrumented = self._cfgIsInstrumented(cfg)
				doCopy = True
			doPrepare = (mustPrepare or autoPrepare) and not isInstrumented
			doCopy = doCopy or doPrepare
			if doCopy:
				cfgTodo.append((cfg, cfg_new, doPrepare))
			cfgStatus.append({1: cfg.split(comPath, 1)[1].lstrip('/'), 2: cfg_new_exists,
				3: isInstrumented, 4: doPrepare})

		utils.vprint('', -1)
		utils.printTabular([(1, 'Config file'), (2, 'Work dir'), (3, 'Instrumented'), (4, 'Scheduled')], cfgStatus, 'lccc')
		utils.vprint('', -1)
		return cfgTodo
Пример #4
0
	def jobCycle(self, wait = utils.wait):
		while True:
			(didWait, lastSpaceMsg) = (False, 0)
			# Check whether wms can submit
			if not self.wms.canSubmit(self.task.wallTime, self._submitFlag):
				self._submitFlag = False
			# Check free disk space
			if (self._checkSpace > 0) and utils.freeSpace(self._workDir) < self._checkSpace:
				if time.time() - lastSpaceMsg > 5 * 60:
					utils.vprint('Not enough space left in working directory', -1, True)
					lastSpaceMsg = time.time()
			else:
				for action in map(str.lower, self._actionList):
					if action.startswith('c') and not utils.abort():   # check for jobs
						if self.jobManager.check(self.wms):
							didWait = wait(self.wms.getTimings()[1])
					elif action.startswith('r') and not utils.abort(): # retrieve finished jobs
						if self.jobManager.retrieve(self.wms):
							didWait = wait(self.wms.getTimings()[1])
					elif action.startswith('s') and not utils.abort() and self._submitFlag:
						if self.jobManager.submit(self.wms):
							didWait = wait(self.wms.getTimings()[1])

			# quit if abort flag is set or not in continuous mode
			if utils.abort() or not self.runContinuous:
				break
			# idle timeout
			if not didWait:
				wait(self.wms.getTimings()[0])
Пример #5
0
	def check(self, wms, maxsample = 100):
		jobList = self.sample(self.jobDB.getJobs(ClassSelector(JobClass.PROCESSING)), utils.QM(self.continuous, maxsample, -1))

		# Check jobs in the joblist and return changes, timeouts and successfully reported jobs
		(change, timeoutList, reported) = self.checkJobList(wms, jobList)
		if change == None: # neither True or False => abort
			return False

		# Cancel jobs which took too long
		if len(timeoutList):
			change = True
			print '\nTimeout for the following jobs:'
			self.cancel(wms, timeoutList)

		# Process task interventions
		self.processIntervention(wms, self._task.getIntervention())

		# Quit when all jobs are finished
		if self.jobDB.getJobsN(ClassSelector(JobClass.ENDSTATE)) == len(self.jobDB):
			self.logDisabled()
			self._eventhandler.onTaskFinish(len(self.jobDB))
			if self._task.canFinish():
				utils.vprint('Task successfully completed. Quitting grid-control!', -1, True)
				utils.abort(True)

		return change
Пример #6
0
    def display(self):
        (catStateDict, catDescDict,
         _) = CategoryReport._getCategoryStateSummary(self)
        infos = []
        head = set()
        stateCat = {
            Job.SUCCESS: 'SUCCESS',
            Job.FAILED: 'FAILED',
            Job.RUNNING: 'RUNNING',
            Job.DONE: 'RUNNING'
        }
        for catKey in catDescDict:
            tmp = dict(catDescDict[catKey])
            head.update(tmp.keys())
            for stateKey in catStateDict[catKey]:
                state = stateCat.get(stateKey, 'WAITING')
                tmp[state] = tmp.get(state, 0) + catStateDict[catKey][stateKey]
            infos.append(tmp)

        stateCatList = ['WAITING', 'RUNNING', 'FAILED', 'SUCCESS']
        utils.vprint(level=-1)
        utils.printTabular(lmap(lambda x: (x, x),
                                sorted(head) + stateCatList),
                           infos,
                           'c' * len(head),
                           fmt=dict.fromkeys(
                               stateCatList,
                               lambda x: '%7d' % parseStr(x, int, 0)))
        utils.vprint(level=-1)
Пример #7
0
	def setupJobParameters(self, config, pm):
		config = config.addSections(['dataset']).addTags([self])
		self.dataSplitter = None
		self.dataRefresh = None
		self.dataset = config.get('dataset', '').strip()
		if self.dataset == '':
			return
		config.set('se output pattern', '@NICK@_job_@MY_JOBID@_@X@', override = False)
		config.set('default lookup', 'DATASETNICK', override = False)

		defaultProvider = config.get('dataset provider', 'ListProvider')
		dataProvider = DataProvider.create(config, self.dataset, defaultProvider)
		splitterName = config.get('dataset splitter', 'FileBoundarySplitter')
		splitterClass = dataProvider.checkSplitter(DataSplitter.getClass(splitterName))
		self.dataSplitter = splitterClass(config)
		self.checkSE = config.getBool('dataset storage check', True, onChange = None)

		# Create and register dataset parameter plugin
		paramSource = DataParameterSource(config.getWorkPath(), 'data',
			dataProvider, self.dataSplitter, self.initDataProcessor())
		DataParameterSource.datasetsAvailable['data'] = paramSource

		# Select dataset refresh rate
		self.dataRefresh = config.getTime('dataset refresh', -1, onChange = None)
		if self.dataRefresh > 0:
			paramSource.resyncSetup(interval = max(self.dataRefresh, dataProvider.queryLimit()))
			utils.vprint('Dataset source will be queried every %s' % utils.strTime(self.dataRefresh), -1)
		else:
			paramSource.resyncSetup(interval = 0)
		def externalRefresh(sig, frame):
			paramSource.resyncSetup(force = True)
		signal.signal(signal.SIGUSR2, externalRefresh)

		if self.dataSplitter.getMaxJobs() == 0:
			raise UserError('There are no events to process')
Пример #8
0
	def getSubmissionJobs(self, maxsample, static = {'showBlocker': True}):
		# Get list of submittable jobs
		readyList = self.jobDB.getJobs(ClassSelector(JobClass.READY))
		retryOK = readyList
		defaultJob = Job()
		if self.maxRetry >= 0:
			retryOK = filter(lambda x: self.jobDB.get(x, defaultJob).attempt - 1 < self.maxRetry, readyList)
		modOK = filter(self._task.canSubmit, readyList)
		jobList = set.intersection(set(retryOK), set(modOK))

		if static['showBlocker'] and len(readyList) > 0 and len(jobList) == 0: # No submission but ready jobs
			err = []
			err += utils.QM(len(retryOK) > 0 and len(modOK) == 0, [], ['have hit their maximum number of retries'])
			err += utils.QM(len(retryOK) == 0 and len(modOK) > 0, [], ['are vetoed by the task module'])
			utils.vprint('All remaining jobs %s!' % str.join(utils.QM(retryOK or modOK, ' or ', ' and '), err), -1, True)
		static['showBlocker'] = not (len(readyList) > 0 and len(jobList) == 0)

		# Determine number of jobs to submit
		submit = len(jobList)
		if self.inQueue > 0:
			submit = min(submit, self.inQueue - self.jobDB.getJobsN(ClassSelector(JobClass.ATWMS)))
		if self.inFlight > 0:
			submit = min(submit, self.inFlight - self.jobDB.getJobsN(ClassSelector(JobClass.PROCESSING)))
		if self.continuous:
			submit = min(submit, maxsample)
		submit = max(submit, 0)

		if self.doShuffle:
			return self.sample(jobList, submit)
		else:
			return sorted(jobList)[:submit]
Пример #9
0
	def __init__(self, config, wmsName):
		WMS.__init__(self, config, wmsName)
		if self.wmsName != self.__class__.__name__.upper():
			utils.vprint('Using batch system: %s (%s)' % (self.__class__.__name__, self.wmsName), -1)
		else:
			utils.vprint('Using batch system: %s' % self.wmsName, -1)

		self.errorLog = config.getWorkPath('error.tar')
		self._runlib = config.getWorkPath('gc-run.lib')
		if not os.path.exists(self._runlib):
			fp = SafeFile(self._runlib, 'w')
			content = SafeFile(utils.pathShare('gc-run.lib')).read()
			fp.write(content.replace('__GC_VERSION__', __import__('grid_control').__version__))
			fp.close()
		self._outputPath = config.getWorkPath('output')
		utils.ensureDirExists(self._outputPath, 'output directory')
		self._failPath = config.getWorkPath('fail')

		# Initialise access token, broker and storage manager
		self._token = config.getCompositePlugin(['proxy', 'access token'], 'TrivialAccessToken',
			'MultiAccessToken', cls = AccessToken, inherit = True, tags = [self])

		# UI -> SE -> WN
		self.smSEIn = config.getPlugin('se input manager', 'SEStorageManager', cls = StorageManager,
			tags = [self], pargs = ('se', 'se input', 'SE_INPUT'))
		self.smSBIn = config.getPlugin('sb input manager', 'LocalSBStorageManager', cls = StorageManager,
			tags = [self], pargs = ('sandbox', 'sandbox', 'SB_INPUT'))
		# UI <- SE <- WN
		self.smSEOut = config.getPlugin('se output manager', 'SEStorageManager', cls = StorageManager,
			tags = [self], pargs = ('se', 'se output', 'SE_OUTPUT'))
		self.smSBOut = None
		
		self.fileNamesEnvironment = config.getBool("file names environment", True, onChange = None)
Пример #10
0
	def __init__(self, config, name):
		NamedObject.__init__(self, config, name)
		self._workDir = config.getWorkPath()
		# Initialise task module
		self.task = config.getClass(['task', 'module'], cls = TaskModule, tags = [self]).getInstance()
		utils.vprint('Current task ID: %s' % self.task.taskID, -1)
		utils.vprint('Task started on %s' % self.task.taskDate, -1)

		# Initialise monitoring module
		self.monitor = ClassFactory(config, ('monitor', 'scripts'), ('monitor manager', 'MultiMonitor'),
			cls = Monitoring, tags = [self.task]).getInstance(self.task)

		# Initialise workload management interface
		self.wms = ClassFactory(config, ('backend', 'grid'), ('backend manager', 'MultiWMS'),
			cls = WMS, tags = [self.task]).getInstance()

		# Initialise job database
		jobManagerCls = config.getClass('job manager', 'SimpleJobManager',
			cls = JobManager, tags = [self.task, self.wms])
		self.jobManager = jobManagerCls.getInstance(self.task, self.monitor)

		# Prepare work package
		self.wms.deployTask(self.task, self.monitor)

		global_config = config.clone()
		self._actionList = global_config.getList('jobs', 'action', ['check', 'retrieve', 'submit'], onChange = None)
		self.runContinuous = global_config.getBool('jobs', 'continuous', False, onChange = None)

		self._checkSpace = config.getInt('workdir space', 10, onChange = None)
		self._submitFlag = config.getBool('submission', True, onChange = None)
		guiClass = config.getClass('gui', 'SimpleConsole', cls = GUI, onChange = None)
		self._gui = guiClass.getInstance(config, self)
Пример #11
0
	def __init__(self, remoteType="", **kwargs):
		self.cmd=False
		# pick requested remote connection
		try:
			self.remoteType = getattr(self.RPHType, remoteType.upper())
			self.cmd = self.RPHTemplate[self.remoteType]["command"]
			self.copy = self.RPHTemplate[self.remoteType]["copy"]
			self.path = self.RPHTemplate[self.remoteType]["path"]
			self.argFormat = self.RPHTemplate[self.remoteType]["argFormat"]
		except Exception:
			raise ConfigError("Request to initialize RemoteProcessHandler of unknown type: %s" % remoteType)
		# destination should be of type: [user@]host
		if self.remoteType==self.RPHType.SSH or self.remoteType==self.RPHType.GSISSH:
			try:
				self.cmd = self.cmd % { "rhost" : kwargs["host"] }
				self.copy = self.copy % { "rhost" : kwargs["host"] }
				self.host = kwargs["host"]
			except Exception:
				raise ConfigError("Request to initialize RemoteProcessHandler of type %s without remote host." % self.RPHType.enumList[self.remoteType])
		# add default arguments for all commands
		self.cmd = self.cmd % { "cmdargs" : kwargs.get("cmdargs",""), "args" : kwargs.get("args","") }
		self.copy = self.copy % { "cpargs" : kwargs.get("cpargs",""), "args" : kwargs.get("args","") }
		# test connection once
		proc = LoggedProcess(self.cmd % { "cmd" : "exit"})
		ret = proc.getAll()[0]
		if ret != 0:
			raise CondorProcessError('Validation of remote connection failed!', proc)
		vprint('Remote interface initialized:\n	Cmd: %s\n	Cp : %s' % (self.cmd,self.copy), level=2)
Пример #12
0
	def __init__(self, remoteType="", **kwargs):
		self.cmd=False
		# pick requested remote connection
		try:
			self.remoteType = getattr(self.RPHType, remoteType.upper())
			self.cmd = self.RPHTemplate[self.remoteType]["command"]
			self.copy = self.RPHTemplate[self.remoteType]["copy"]
			self.path = self.RPHTemplate[self.remoteType]["path"]
			self.argFormat = self.RPHTemplate[self.remoteType]["argFormat"]
		except Exception:
			raise ConfigError("Request to initialize RemoteProcessHandler of unknown type: %s" % remoteType)
		# destination should be of type: [user@]host
		if self.remoteType==self.RPHType.SSH or self.remoteType==self.RPHType.GSISSH:
			try:
				self.cmd = self.cmd % { "rhost" : kwargs["host"] }
				self.copy = self.copy % { "rhost" : kwargs["host"] }
				self.host = kwargs["host"]
			except Exception:
				raise ConfigError("Request to initialize RemoteProcessHandler of type %s without remote host." % self.RPHType.enumList[self.remoteType])
		# add default arguments for all commands
		self.cmd = self.cmd % { "cmdargs" : kwargs.get("cmdargs",""), "args" : kwargs.get("args","") }
		self.copy = self.copy % { "cpargs" : kwargs.get("cpargs",""), "args" : kwargs.get("args","") }
		# test connection once
		proc = LoggedProcess(self.cmd % { "cmd" : "exit"})
		ret, out, err = proc.getAll()
		if ret!=0:
			raise CondorProcessError('Validation of remote connection failed!', proc)
		vprint('Remote interface initialized:\n	Cmd: %s\n	Cp : %s' % (self.cmd,self.copy), level=2)
Пример #13
0
 def _displaySetup(self, dsPath, head):
     if os.path.exists(dsPath):
         nickNames = set()
         for block in DataProvider.loadFromFile(dsPath).getBlocks():
             nickNames.add(block[DataProvider.Nickname])
         utils.vprint('Mapping between nickname and other settings:\n', -1)
         report = []
         for nick in sorted(nickNames):
             lumi_filter_str = formatLumi(
                 self._nmLumi.lookup(nick, '', is_selector=False))
             if len(lumi_filter_str) > 4:
                 nice_lumi_filter = '%s ... %s (%d entries)' % (
                     lumi_filter_str[0], lumi_filter_str[-1],
                     len(lumi_filter_str))
             else:
                 nice_lumi_filter = str.join(', ', lumi_filter_str)
             config_files = self._nmCfg.lookup(nick, '', is_selector=False)
             tmp = {
                 0: nick,
                 1: str.join(', ', imap(os.path.basename, config_files)),
                 2: nice_lumi_filter
             }
             lookupvars = {'DATASETNICK': nick}
             for src in self._pm.lookupSources:
                 src.fillParameterInfo(None, lookupvars)
             tmp.update(lookupvars)
             report.append(tmp)
         utils.printTabular(head, report, 'cl')
         utils.vprint(level=-1)
Пример #14
0
    def _cfgFindUninitialized(self, config, cfgFiles, autoPrepare,
                              mustPrepare):
        comPath = os.path.dirname(os.path.commonprefix(cfgFiles))

        cfgTodo = []
        cfgStatus = []
        for cfg in cfgFiles:
            cfg_new = config.getWorkPath(os.path.basename(cfg))
            cfg_new_exists = os.path.exists(cfg_new)
            if cfg_new_exists:
                isInstrumented = self._cfgIsInstrumented(cfg_new)
                doCopy = False
            else:
                isInstrumented = self._cfgIsInstrumented(cfg)
                doCopy = True
            doPrepare = (mustPrepare or autoPrepare) and not isInstrumented
            doCopy = doCopy or doPrepare
            if doCopy:
                cfgTodo.append((cfg, cfg_new, doPrepare))
            cfgStatus.append({
                1: cfg.split(comPath, 1)[1].lstrip('/'),
                2: cfg_new_exists,
                3: isInstrumented,
                4: doPrepare
            })

        utils.vprint('', -1)
        utils.printTabular([(1, 'Config file'), (2, 'Work dir'),
                            (3, 'Instrumented'), (4, 'Scheduled')], cfgStatus,
                           'lccc')
        utils.vprint('', -1)
        return cfgTodo
Пример #15
0
 def _discover(self, discoverFun, cached=True):
     if not cached or (self._itemsDiscovered is False):
         self._itemsDiscovered = discoverFun()
         msg = 'an unknown number of'
         if self._itemsDiscovered is not None:
             msg = str(len(self._itemsDiscovered))
         utils.vprint('Broker discovered %s %s' % (msg, self._itemName))
     return self._itemsDiscovered
Пример #16
0
	def _discover(self, discoverFun, cached = True):
		if not cached or (self._itemsDiscovered == False):
			self._itemsDiscovered = discoverFun()
			msg = 'an unknown number of'
			if self._itemsDiscovered != None:
				msg = str(len(self._itemsDiscovered))
			utils.vprint('Broker discovered %s %s' % (msg, self._itemName))
		return self._itemsDiscovered
Пример #17
0
	def getCMSDatasets(self):
		result = [self.datasetPath]
		if '*' in self.datasetPath:
			result = list(self.getCMSDatasetsImpl(self.datasetPath))
			if len(result) == 0:
				raise DatasetError('No datasets selected by DBS wildcard %s !' % self.datasetPath)
			utils.vprint('DBS dataset wildcard selected:\n\t%s\n' % str.join('\n\t', result), -1)
		return result # List of resolved datasetPaths
Пример #18
0
    def __init__(self, config, name):
        head = [(0, "Nickname")]

        # Mapping between nickname and config files:
        cfgList = config.get("nickname config", "")
        self.nmCfg = config.getDict(
            "nickname config", {}, parser=lambda x: map(str.strip, x.split(",")), str=lambda x: str.join(",", x)
        )[0]
        if cfgList:
            if "config file" in config.getOptions():
                raise ConfigError("Please use 'nickname config' instead of 'config file'")
            allConfigFiles = utils.flatten(self.nmCfg.values())
            config.set("config file", str.join("\n", allConfigFiles))
            head.append((1, "Config file"))

            # Mapping between nickname and constants:
        self.nmCName = map(str.strip, config.get("nickname constants", "").split())
        self.nmConst = {}
        for var in self.nmCName:
            tmp = config.getDict(var, {})[0]
            for (nick, value) in tmp.items():
                if value:
                    self.nmConst.setdefault(nick, {})[var] = value
                else:
                    self.nmConst.setdefault(nick, {})[var] = ""
            head.append((var, var))

            # Mapping between nickname and lumi filter:
        if "lumi filter" in config.getOptions():
            raise ConfigError("Please use 'nickname lumi filter' instead of 'lumi filter'")
        lumiParse = lambda x: formatLumi(parseLumiFilter(x))
        self.nmLumi = config.getDict("nickname lumi filter", {}, parser=lumiParse)[0]
        if self.nmLumi:
            for dataset in config.get("dataset", "").splitlines():
                (datasetNick, datasetProvider, datasetExpr) = DataProvider.parseDatasetExpr(config, dataset, None)
                config.set(
                    "dataset %s" % datasetNick,
                    "lumi filter",
                    str.join(",", utils.flatten(fromNM(self.nmLumi, datasetNick, []))),
                )
            config.set("lumi filter", str.join(",", self.nmLumi.get(None, [])))
            head.append((2, "Lumi filter"))

        utils.vprint("Mapping between nickname and other settings:\n", -1)

        def report():
            for nick in sorted(set(self.nmCfg.keys() + self.nmConst.keys() + self.nmLumi.keys())):
                tmp = {
                    0: nick,
                    1: str.join(", ", map(os.path.basename, self.nmCfg.get(nick, ""))),
                    2: self.displayLumi(self.nmLumi.get(nick, "")),
                }
                yield utils.mergeDicts([tmp, self.nmConst.get(nick, {})])

        utils.printTabular(head, report(), "cl")
        utils.vprint(level=-1)
        CMSSW.__init__(self, config, name)
Пример #19
0
	def logDisabled(self):
		disabled = self.jobDB.getJobs(ClassSelector(JobClass.DISABLED))
		try:
			open(self.disableLog, 'w').write(str.join('\n', map(str, disabled)))
		except Exception:
			raise RuntimeError('Could not write disabled jobs to file %s!' % self.disableLog)
		if len(disabled) > 0:
			utils.vprint('There are %d disabled jobs in this task!' % len(disabled), -1, True)
			utils.vprint('Please refer to %s for a complete list.' % self.disableLog, -1, True, once = True)
Пример #20
0
	def _checkTimeleft(self, neededTime): # check for time left
		delta = time.time() - self._lastUpdate
		timeleft = max(0, self._getTimeleft(cached = True) - delta)
		# recheck proxy => after > 30min have passed or when time is running out (max every 5 minutes)
		if (delta > self._minQueryTime) or (timeleft < neededTime and delta > self._maxQueryTime):
			self._lastUpdate = time.time()
			timeleft = self._getTimeleft(cached = False)
			verbosity = QM(timeleft < neededTime, -1, 0)
			utils.vprint('The proxy now has %s left' % utils.strTime(timeleft), verbosity, printTime = True)
		return timeleft >= neededTime
Пример #21
0
	def canSubmit(self, neededTime, canCurrentlySubmit):
		if not self._checkTimeleft(self._lowerLimit):
			raise UserError('Your proxy only has %d seconds left! (Required are %s)' %
				(self._getTimeleft(cached = True), utils.strTime(self._lowerLimit)))
		if not self._checkTimeleft(self._lowerLimit + neededTime) and canCurrentlySubmit:
			utils.vprint('Proxy lifetime (%s) does not meet the proxy and walltime (%s) requirements!' %
				(utils.strTime(self._getTimeleft(cached = False)), utils.strTime(self._lowerLimit + neededTime)), -1, printTime = True)
			utils.vprint('Disabling job submission', -1, printTime = True)
			return False
		return True
Пример #22
0
	def __init__(self, config, wmsName):
		utils.vprint('Using batch system: Condor/GlideInWMS', -1)
		### WMSname=condor is a hardcoded hack until interface is clear
		BasicWMS.__init__(self, config, wmsName, 'condor')
		# special debug out/messages/annotations - may have noticeable effect on storage and performance!
		if config.get( self._getSections("backend"), "debugLog", ""):
			self.debug=open(config.get( self._getSections("backend"), "debugLog", ""),'a')
		else:
			self.debug=False
		######
		self.taskID = config.get('condor', 'task id', md5(str(time.time())).hexdigest(), persistent = True) # FIXME!
		self.debugOut("""
		
		#############################
		Initialized Condor/GlideInWMS
		#############################
		Config: %s
		taskID: %s
		Name:   %s
		#############################
		
		"""%(config.confName,self.taskID,wmsName))
		# finalize config state by reading values or setting to defaults
		self.settings={
			"jdl": {
				"Universe" : config.get( self._getSections("backend"), "Universe", "vanilla"),
				"NotifyEmail" : config.get( self._getSections("backend"), "NotifyEmail", ""),
				"ClassAdData" : config.getList( self._getSections("backend"), "ClassAdData",[]),
				"JDLData" : config.getList( self._getSections("backend"), "JDLData",[])
				},
			"pool" : {
				"hosts" : config.getList( self._getSections("backend"), "PoolHostList",[])
				}
			}
		# prepare interfaces for local/remote/ssh pool access
		self._initPoolInterfaces(config)
		# load keys for condor pool ClassAds
		self.poolReqs  = config.getDict(self._getSections("backend"), 'poolArgs req', {})[0]
		self.poolQuery = config.getDict(self._getSections("backend"), 'poolArgs query', {})[0]
		self._formatStatusReturnQuery(config)
		# Sandbox base path where individual job data is stored, staged and returned to
		self.sandPath = config.getPath(self._getSections("local"), 'sandbox path', config.getWorkPath('sandbox'), mustExist = False)
		# history query is faster with split files - check if and how this is used
		# default condor_history command works WITHOUT explicitly specified file
		self.historyFile = None
		if self.remoteType == poolType.LOCAL and commands.getoutput( self.configValExec + " ENABLE_HISTORY_ROTATION").lower() == "true":
			self.historyFile = commands.getoutput( self.configValExec + " HISTORY")
			if not os.path.isfile(self.historyFile):
				self.historyFile = None
		# broker for selecting Sites
		self.brokerSite = config.getClass('site broker', 'UserBroker', cls = Broker,
			tags = [self]).getInstance('sites', 'sites', self.getSites)
		self.debugFlush()
Пример #23
0
    def __init__(self, config, wmsName):
        WMS.__init__(self, config, wmsName)
        if self.wmsName != self.__class__.__name__.upper():
            utils.vprint(
                'Using batch system: %s (%s)' %
                (self.__class__.__name__, self.wmsName), -1)
        else:
            utils.vprint('Using batch system: %s' % self.wmsName, -1)

        self.errorLog = config.getWorkPath('error.tar')
        self._runlib = config.getWorkPath('gc-run.lib')
        if not os.path.exists(self._runlib):
            fp = SafeFile(self._runlib, 'w')
            content = SafeFile(utils.pathShare('gc-run.lib')).read()
            fp.write(
                content.replace('__GC_VERSION__',
                                __import__('grid_control').__version__))
            fp.close()
        self._outputPath = config.getWorkPath('output')
        utils.ensureDirExists(self._outputPath, 'output directory')
        self._failPath = config.getWorkPath('fail')

        # Initialise access token, broker and storage manager
        self._token = config.getCompositePlugin(['proxy', 'access token'],
                                                'TrivialAccessToken',
                                                'MultiAccessToken',
                                                cls=AccessToken,
                                                inherit=True,
                                                tags=[self])

        # UI -> SE -> WN
        self.smSEIn = config.getPlugin('se input manager',
                                       'SEStorageManager',
                                       cls=StorageManager,
                                       tags=[self],
                                       pargs=('se', 'se input', 'SE_INPUT'))
        self.smSBIn = config.getPlugin('sb input manager',
                                       'LocalSBStorageManager',
                                       cls=StorageManager,
                                       tags=[self],
                                       pargs=('sandbox', 'sandbox',
                                              'SB_INPUT'))
        # UI <- SE <- WN
        self.smSEOut = config.getPlugin('se output manager',
                                        'SEStorageManager',
                                        cls=StorageManager,
                                        tags=[self],
                                        pargs=('se', 'se output', 'SE_OUTPUT'))
        self.smSBOut = None

        self.fileNamesEnvironment = config.getBool("file names environment",
                                                   True,
                                                   onChange=None)
Пример #24
0
	def _CreateSocket(self, duration = 60):
		args = [self.cmd, self.defaultArgs, "-o ControlMaster=yes", self.socketArgsDef, self.remoteHost, self._argFormat("sleep %d" % duration)]
		self.__ControlMaster = LoggedProcess(" ".join(args))
		timeout = 0
		while not os.path.exists(self.sshLink):
			time.sleep(0.5)
			timeout += 0.5
			if timeout == 5:
				vprint("SSH socket still not available after 5 seconds...\n%s" % self.sshLink, level=1)
				vprint('Socket process: %s' % (self.__ControlMaster.cmd), level=2)
			if timeout == 10:
				return False
Пример #25
0
	def _CreateSocket(self, duration = 60):
		args = [self.cmd, self.defaultArgs, "-o ControlMaster=yes", self.socketArgsDef, self.remoteHost, self._argFormat("sleep %d" % duration)]
		self.__ControlMaster = LoggedProcess(" ".join(args))
		timeout = 0
		while not os.path.exists(self.sshLink):
			time.sleep(0.5)
			timeout += 0.5
			if timeout == 5:
				vprint("SSH socket still not available after 5 seconds...\n%s" % self.sshLink, level=1)
				vprint('Socket process: %s' % (self.__ControlMaster.cmd), level=2)
			if timeout == 10:
				return False
Пример #26
0
	def __init__(self, config, wmsName):
		utils.vprint('Using batch system: Condor/GlideInWMS', -1)
		BasicWMS.__init__(self, config, wmsName)
		# special debug out/messages/annotations - may have noticeable effect on storage and performance!
		debugLogFN = config.get('debugLog', '')
		self.debug = False
		if debugLogFN:
			self.debug = open(debugLogFN, 'a')
		######
		self.taskID = config.get('task id', md5(str(time.time())).hexdigest(), persistent = True) # FIXME!
		self.debugOut("""

		#############################
		Initialized Condor/GlideInWMS
		#############################
		Config: %s
		taskID: %s
		Name:   %s
		#############################

		"""%(config.getConfigName(),self.taskID,wmsName))
		# finalize config state by reading values or setting to defaults
		self.settings={
			'jdl': {
				'Universe' : config.get('Universe', 'vanilla'),
				'NotifyEmail' : config.get('NotifyEmail', ''),
				'ClassAdData' : config.getList('ClassAdData',[]),
				'JDLData' : config.getList('JDLData',[])
				},
			'pool' : {
				'hosts' : config.getList('PoolHostList',[])
				}
			}
		# prepare interfaces for local/remote/ssh pool access
		self._initPoolInterfaces(config)
		# load keys for condor pool ClassAds
		self.poolReqs  = config.getDict('poolArgs req', {})[0]
		self.poolQuery = config.getDict('poolArgs query', {})[0]
		self._formatStatusReturnQuery(config)
		# Sandbox base path where individual job data is stored, staged and returned to
		self.sandPath = config.getPath('sandbox path', config.getWorkPath('sandbox'), mustExist = False)
		# history query is faster with split files - check if and how this is used
		# default condor_history command works WITHOUT explicitly specified file
		self.historyFile = None
		if self.remoteType == PoolType.LOCAL and getoutput( self.configValExec + ' ENABLE_HISTORY_ROTATION').lower() == 'true':
			self.historyFile = getoutput( self.configValExec + ' HISTORY')
			if not os.path.isfile(self.historyFile):
				self.historyFile = None
		# broker for selecting Sites
		self.brokerSite = config.getPlugin('site broker', 'UserBroker', cls = Broker,
			tags = [self], pargs = ('sites', 'sites', self.getSites))
		self.debugFlush()
Пример #27
0
	def _setupJobParameters(self, config):
		data_config = config.changeView(viewClass = 'TaggedConfigView', addSections = ['dataset'])
		self.dataSplitter = None
		self._data_refresh = -1
		def userRefresh(config, old_obj, cur_obj, cur_entry, obj2str):
			if (old_obj == '') and (cur_obj != ''):
				raise UserError('It is currently not possible to attach a dataset to a non-dataset task!')
			self._log.info('Dataset setup was changed - forcing resync...')
			config.setState(True, 'resync', detail = 'dataset')
			config.setState(True, 'init', detail = 'config') # This will trigger a write of the new options
			return cur_obj
		dataProvider = data_config.getCompositePlugin('dataset', '', ':MultiDatasetProvider:',
			cls = DataProvider, requirePlugin = False, onChange = userRefresh)
		self._forceRefresh = config.getState('resync', detail = 'dataset')
		config.setState(False, 'resync', detail = 'dataset')
		if not dataProvider:
			return

		tmp_config = data_config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, setTags = [], addSections = ['storage'])
		tmp_config.set('se output pattern', '@NICK@_job_@GC_JOB_ID@_@X@')
		tmp_config = data_config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, setTags = [], addSections = ['parameters'])
		tmp_config.set('default lookup', 'DATASETNICK')

		splitterName = data_config.get('dataset splitter', 'FileBoundarySplitter')
		splitterClass = dataProvider.checkSplitter(DataSplitter.getClass(splitterName))
		self.dataSplitter = splitterClass(data_config)

		# Create and register dataset parameter source
		partProcessor = data_config.getCompositePlugin('partition processor',
			'TFCPartitionProcessor LocationPartitionProcessor MetaPartitionProcessor BasicPartitionProcessor',
			'MultiPartitionProcessor', cls = PartitionProcessor, onChange = triggerResync(['parameters']))
		DataParameterSource = ParameterSource.getClass('DataParameterSource')
		self._dataPS = DataParameterSource(data_config.getWorkPath(), 'data',
			dataProvider, self.dataSplitter, partProcessor)
		DataParameterSource.datasetsAvailable['data'] = self._dataPS

		# Select dataset refresh rate
		self._data_refresh = data_config.getTime('dataset refresh', -1, onChange = None)
		if self._data_refresh > 0:
			self._dataPS.resyncSetup(interval = max(self._data_refresh, dataProvider.queryLimit()))
			utils.vprint('Dataset source will be queried every %s' % strTime(self._data_refresh), -1)
		else:
			self._dataPS.resyncSetup(interval = 0)
		if self._forceRefresh:
			self._dataPS.resyncSetup(force = True)
		def externalRefresh(sig, frame):
			self._dataPS.resyncSetup(force = True)
		signal.signal(signal.SIGUSR2, externalRefresh)

		if self.dataSplitter.getMaxJobs() == 0:
			raise UserError('There are no events to process')
Пример #28
0
	def setupJobParameters(self, config, pm):
		config = config.changeView(viewClass = 'TaggedConfigView', addSections = ['dataset'])
		self.dataSplitter = None
		self.dataRefresh = -1
		def userRefresh(config, old_obj, cur_obj, cur_entry, obj2str):
			if (old_obj == '') and (cur_obj != ''):
				raise UserError('It is currently not possible to attach a dataset to a non-dataset task!')
			self._log.info('Dataset setup was changed - forcing resync...')
			config.setState(True, 'resync', detail = 'dataset')
			config.setState(True, 'init', detail = 'config') # This will trigger a write of the new options
			return cur_obj
		dataProvider = config.getCompositePlugin('dataset', '', ':MultiDatasetProvider:',
			cls = DataProvider, requirePlugin = False, onChange = userRefresh)
		self._forceRefresh = config.getState('resync', detail = 'dataset')
		config.setState(False, 'resync', detail = 'dataset')
		if not dataProvider:
			return

		tmp_config = config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, setTags = [], addSections = ['storage'])
		tmp_config.set('se output pattern', '@NICK@_job_@GC_JOB_ID@_@X@')
		tmp_config = config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, setTags = [], addSections = ['parameters'])
		tmp_config.set('default lookup', 'DATASETNICK')

		splitterName = config.get('dataset splitter', 'FileBoundarySplitter')
		splitterClass = dataProvider.checkSplitter(DataSplitter.getClass(splitterName))
		self.dataSplitter = splitterClass(config)

		# Create and register dataset parameter source
		partProcessor = config.getCompositePlugin('partition processor',
			'BasicPartitionProcessor LocationPartitionProcessor', 'MultiPartitionProcessor',
			cls = PartitionProcessor)
		DataParameterSource = ParameterSource.getClass('DataParameterSource')
		self._dataPS = DataParameterSource(config.getWorkPath(), 'data',
			dataProvider, self.dataSplitter, partProcessor)
		DataParameterSource.datasetsAvailable['data'] = self._dataPS

		# Select dataset refresh rate
		self.dataRefresh = config.getTime('dataset refresh', -1, onChange = None)
		if self.dataRefresh > 0:
			self._dataPS.resyncSetup(interval = max(self.dataRefresh, dataProvider.queryLimit()))
			utils.vprint('Dataset source will be queried every %s' % strTime(self.dataRefresh), -1)
		else:
			self._dataPS.resyncSetup(interval = 0)
		if self._forceRefresh:
			self._dataPS.resyncSetup(force = True)
		def externalRefresh(sig, frame):
			self._dataPS.resyncSetup(force = True)
		signal.signal(signal.SIGUSR2, externalRefresh)

		if self.dataSplitter.getMaxJobs() == 0:
			raise UserError('There are no events to process')
Пример #29
0
	def _getCMSSWPaths(self, config):
		result = []
		if config.getState('init', detail = 'sandbox'):
			userPath = config.get('cmssw dir', '')
			if userPath != '':
				result.append(('CMSSW_DIR_USER', userPath))
			if self.scramEnv.get('RELEASETOP', None):
				projPath = os.path.normpath('%s/../../../../' % self.scramEnv['RELEASETOP'])
				result.append(('CMSSW_DIR_PRO', projPath))
		if result:
			utils.vprint('Local jobs will try to use the CMSSW software located here:', -1)
			for i, loc in enumerate(result):
				utils.vprint(' %i) %s' % (i + 1, loc[1]), -1)
		return result
Пример #30
0
	def _configureSCRAMSettings(self, config):
		scramProject = config.getList('scram project', [])
		if len(scramProject):
			self.projectArea = config.getPath('project area', '')
			if len(self.projectArea):
				raise ConfigError('Cannot specify both SCRAM project and project area')
			if len(scramProject) != 2:
				raise ConfigError('SCRAM project needs exactly 2 arguments: PROJECT VERSION')
		else:
			self.projectArea = config.getPath('project area')

		if len(self.projectArea):
			self.pattern = config.getList('area files', ['-.*', '-config', 'bin', 'lib', 'python', 'module',
				'*/data', '*.xml', '*.sql', '*.db', '*.cf[if]', '*.py', '-*/.git', '-*/.svn', '-*/CVS', '-*/work.*'])

			if os.path.exists(self.projectArea):
				utils.vprint('Project area found in: %s' % self.projectArea, -1)
			else:
				raise ConfigError('Specified config area %r does not exist!' % self.projectArea)

			scramPath = os.path.join(self.projectArea, '.SCRAM')
			# try to open it
			try:
				fp = open(os.path.join(scramPath, 'Environment'), 'r')
				self.scramEnv = utils.DictFormat().parse(fp, keyParser = {None: str})
			except Exception:
				raise ConfigError('Project area file %s/.SCRAM/Environment cannot be parsed!' % self.projectArea)

			for key in ['SCRAM_PROJECTNAME', 'SCRAM_PROJECTVERSION']:
				if key not in self.scramEnv:
					raise ConfigError('Installed program in project area not recognized.')

			default_archs = lfilter(lambda x: os.path.isdir(os.path.join(scramPath, x)) and not x.startswith('.'), os.listdir(scramPath)) + [noDefault]
			default_arch = default_archs[0]
			self.scramArch = config.get('scram arch', default_arch)
			try:
				fp = open(os.path.join(scramPath, self.scramArch, 'Environment'), 'r')
				self.scramEnv.update(utils.DictFormat().parse(fp, keyParser = {None: str}))
			except Exception:
				raise ConfigError('Project area file .SCRAM/%s/Environment cannot be parsed!' % self.scramArch)
		else:
			self.scramEnv = {
				'SCRAM_PROJECTNAME': scramProject[0],
				'SCRAM_PROJECTVERSION': scramProject[1]
			}
			self.scramArch = config.get('scram arch')

		self.scramVersion = config.get('scram version', 'scramv1')
		if self.scramEnv['SCRAM_PROJECTNAME'] != 'CMSSW':
			raise ConfigError('Project area contains no CMSSW project')
Пример #31
0
 def display(self):
     reports = []
     for jobNum in self._jobs:
         jobObj = self._jobDB.get(jobNum)
         if not jobObj or (jobObj.state == Job.INIT):
             continue
         reports.append({0: jobNum, 1: Job.enum2str(jobObj.state), 2: jobObj.wmsId})
         if utils.verbosity() > 0:
             history = jobObj.history.items()
             history.reverse()
             for at, dest in history:
                 if dest != "N/A":
                     reports.append({1: at, 2: " -> " + dest})
         elif jobObj.get("dest", "N/A") != "N/A":
             reports.append({2: " -> " + jobObj.get("dest")})
     utils.printTabular(zip(range(3), ["Job", "Status / Attempt", "Id / Destination"]), reports, "rcl")
     utils.vprint()
Пример #32
0
	def getGCBlocks(self, usePhedex):
		blockCache = []
		for datasetPath in self.getCMSDatasets():
			counter = 0
			for (blockPath, listSE) in self.getCMSBlocks(datasetPath, getSites = not usePhedex):
				if blockPath in blockCache:
					raise DatasetError('CMS source provided duplicate blocks! %s' % blockPath)
				blockCache.append(blockPath)
				result = {}
				result[DataProvider.Dataset] = blockPath.split('#')[0]
				result[DataProvider.BlockName] = blockPath.split('#')[1]

				if usePhedex: # Start parallel phedex query
					dictSE = {}
					tPhedex = utils.gcStartThread("Query phedex site info for %s" % blockPath, self.getPhedexSEList, blockPath, dictSE)

				if self.selectedLumis:
					result[DataProvider.Metadata] = ['Runs']
					if self.includeLumi:
						result[DataProvider.Metadata].append('Lumi')
				result[DataProvider.FileList] = list(self.getCMSFiles(blockPath))
				if self.checkUnique:
					uniqueURLs = set(map(lambda x: x[DataProvider.URL], result[DataProvider.FileList]))
					if len(result[DataProvider.FileList]) != len(uniqueURLs):
						utils.vprint('Warning: The webservice returned %d duplicated files in dataset block %s! Continuing with unique files...' %
							(len(result[DataProvider.FileList]) - len(uniqueURLs)), -1)
					uniqueFIs = []
					for fi in result[DataProvider.FileList]:
						if fi[DataProvider.URL] in uniqueURLs:
							uniqueURLs.remove(fi[DataProvider.URL])
							uniqueFIs.append(fi)
					result[DataProvider.FileList] = uniqueFIs

				if usePhedex:
					tPhedex.join()
					listSE = dictSE.get(blockPath)
				result[DataProvider.Locations] = listSE

				if len(result[DataProvider.FileList]):
					counter += 1
					yield result

			if (counter == 0) and self.selectedLumis:
				raise DatasetError('Dataset %s does not contain the requested run/lumi sections!' % datasetPath)
			elif counter == 0:
				raise DatasetError('Dataset %s does not contain any valid blocks!' % datasetPath)
Пример #33
0
 def _getCMSSWPaths(self, config):
     result = []
     if config.getState('init', detail='sandbox'):
         userPath = config.get('cmssw dir', '')
         if userPath != '':
             result.append(('CMSSW_DIR_USER', userPath))
         if self.scramEnv.get('RELEASETOP', None):
             projPath = os.path.normpath('%s/../../../../' %
                                         self.scramEnv['RELEASETOP'])
             result.append(('CMSSW_DIR_PRO', projPath))
     if result:
         utils.vprint(
             'Local jobs will try to use the CMSSW software located here:',
             -1)
         for i, loc in enumerate(result):
             utils.vprint(' %i) %s' % (i + 1, loc[1]), -1)
     return result
Пример #34
0
	def display(self):
		reports = []
		for jobNum in self._jobs:
			jobObj = self._jobDB.get(jobNum)
			if not jobObj or (jobObj.state == Job.INIT):
				continue
			reports.append({0: jobNum, 1: Job.states[jobObj.state], 2: jobObj.wmsId})
			if utils.verbosity() > 0:
				history = jobObj.history.items()
				history.reverse()
				for at, dest in history:
					if dest != 'N/A':
						reports.append({1: at, 2: ' -> ' + dest})
			elif jobObj.get('dest', 'N/A') != 'N/A':
				reports.append({2: ' -> ' + jobObj.get('dest')})
		utils.printTabular(zip(range(3), ['Job', 'Status / Attempt', 'Id / Destination']), reports, 'rcl')
		utils.vprint()
Пример #35
0
 def displayWorkflow(self):
     utils.vprint(level=-1)
     self._report.display()
     utils.vprint(level=-1)
     if self._workflow.duration < 0:
         utils.vprint('Running in continuous mode. Press ^C to exit.', -1)
     elif self._workflow.duration > 0:
         utils.vprint(
             'Running for %s' % strTimeShort(self._workflow.duration), -1)
     self._workflow.jobCycle()
Пример #36
0
	def setupJobParameters(self, config, pm):
		config = config.changeView(viewClass = TaggedConfigView, addSections = ['dataset'], addTags = [self])
		self.dataSplitter = None
		self.dataRefresh = None
		self._forceRefresh = config.getState('resync', detail = 'dataset', default = False)
		def userRefresh(config, old_obj, cur_obj, cur_entry, obj2str):
			if ((old_obj == '') and (cur_obj != '')):
				raise UserError('It is currently not possible to attach a dataset to a non-dataset task!')
			self._forceRefresh = True
			return cur_obj
		self.dataset = config.get('dataset', '', onChange = userRefresh).strip()
		if self.dataset == '':
			return
		config.set('se output pattern', '@NICK@_job_@GC_JOB_ID@_@X@')
		config.set('default lookup', 'DATASETNICK')

		defaultProvider = config.get('dataset provider', 'ListProvider')
		dataProvider = DataProvider.create(config, self.dataset, defaultProvider)
		splitterName = config.get('dataset splitter', 'FileBoundarySplitter')
		splitterClass = dataProvider.checkSplitter(DataSplitter.getClass(splitterName))
		self.dataSplitter = splitterClass(config)

		# Create and register dataset parameter source
		paramSplitProcessor = config.getCompositePlugin('dataset processor',
			'BasicDataSplitProcessor SECheckSplitProcessor', 'MultiDataSplitProcessor',
			cls = DataSplitProcessor).getInstance(config)
		paramSource = DataParameterSource(config.getWorkPath(), 'data',
			dataProvider, self.dataSplitter, paramSplitProcessor)
		DataParameterSource.datasetsAvailable['data'] = paramSource

		# Select dataset refresh rate
		self.dataRefresh = config.getTime('dataset refresh', -1, onChange = None)
		if self.dataRefresh > 0:
			paramSource.resyncSetup(interval = max(self.dataRefresh, dataProvider.queryLimit()))
			utils.vprint('Dataset source will be queried every %s' % utils.strTime(self.dataRefresh), -1)
		else:
			paramSource.resyncSetup(interval = 0)
		if self._forceRefresh:
			paramSource.resyncSetup(force = True)
		def externalRefresh(sig, frame):
			paramSource.resyncSetup(force = True)
		signal.signal(signal.SIGUSR2, externalRefresh)

		if self.dataSplitter.getMaxJobs() == 0:
			raise UserError('There are no events to process')
Пример #37
0
	def display(self):
		(catStateDict, catDescDict, _) = CategoryReport._getCategoryStateSummary(self)
		infos = []
		head = set()
		stateCat = {Job.SUCCESS: 'SUCCESS', Job.FAILED: 'FAILED', Job.RUNNING: 'RUNNING', Job.DONE: 'RUNNING'}
		for catKey in catDescDict:
			tmp = dict(catDescDict[catKey])
			head.update(tmp.keys())
			for stateKey in catStateDict[catKey]:
				state = stateCat.get(stateKey, 'WAITING')
				tmp[state] = tmp.get(state, 0) + catStateDict[catKey][stateKey]
			infos.append(tmp)

		stateCatList = ['WAITING', 'RUNNING', 'FAILED', 'SUCCESS']
		utils.vprint(level = -1)
		utils.printTabular(lmap(lambda x: (x, x), sorted(head) + stateCatList),
			infos, 'c' * len(head), fmt = dict.fromkeys(stateCatList, lambda x: '%7d' % parseStr(x, int, 0)))
		utils.vprint(level = -1)
Пример #38
0
	def getPhedexSEList(self, blockPath, dictSE):
		dictSE[blockPath] = []
		url = 'https://cmsweb.cern.ch/phedex/datasvc/json/prod/blockreplicas'
		for phedexBlock in readJSON(url, {'block': blockPath})['phedex']['block']:
			for replica in phedexBlock['replica']:
				if self.nodeFilter(replica['node'], replica['complete'] == 'y'):
					location = None
					if self.locationFormat == 'hostname':
						location = replica.get('se')
					elif self.locationFormat == 'sitedb':
						location = replica.get('node')
					elif self.locationFormat == 'both' and (replica.get('node') or replica.get('se')):
						location = '%s/%s' % (replica.get('node'), replica.get('se'))
					if location:
						dictSE[blockPath].append(location)
					else:
						utils.vprint('Warning: Dataset block %s replica at %s / %s is skipped!' %
							(blockPath, replica.get('node'), replica.get('se')) , -1)
Пример #39
0
	def initPSpace(self):
		result = []
		def addEntry(pNum):
			tmp = {ParameterInfo.ACTIVE: True, ParameterInfo.REQS: []}
			self._psource.fillParameterInfo(pNum, tmp)
			lookupResult = self._matcher.lookup(tmp)
			if lookupResult:
				for (lookupIdx, tmp) in enumerate(lookupResult):
					result.append((pNum, lookupIdx))

		if self._psource.getMaxParameters() is None:
			addEntry(None)
		else:
			for pNum in irange(self._psource.getMaxParameters()):
				addEntry(pNum)
		if len(result) == 0:
			utils.vprint('Lookup parameter "%s" has no matching entries!' % self._key, -1)
		return result
Пример #40
0
	def __init__(self, config, name, abort = None):
		NamedPlugin.__init__(self, config, name)

		# Workdir settings
		self._workDir = config.getWorkPath()
		self._checkSpace = config.getInt('workdir space', 10, onChange = None)

		# Initialise task module
		self.task = config.getPlugin(['module', 'task'], cls = TaskModule, tags = [self])
		if abort == 'task':
			return
		utils.vprint('Current task ID: %s' % self.task.taskID, -1)
		utils.vprint('Task started on %s' % self.task.taskDate, -1)

		# Initialise workload management interface
		self.wms = config.getCompositePlugin('backend', 'grid', 'MultiWMS',
			cls = WMS, tags = [self, self.task])

		# Subsequent config calls also include section "jobs":
		jobs_config = config.changeView(viewClass = 'TaggedConfigView',
			addSections = ['jobs'], addTags = [self])

		# Initialise monitoring module
		self.monitor = jobs_config.getCompositePlugin('monitor', 'scripts', 'MultiMonitor',
			cls = Monitoring, tags = [self, self.task], pargs = (self.task,))

		# Initialise job database
		self.jobManager = jobs_config.getPlugin('job manager', 'SimpleJobManager',
			cls = JobManager, tags = [self, self.task, self.wms], pargs = (self.task, self.monitor))

		# Prepare work package
		self.wms.deployTask(self.task, self.monitor)

		# Configure workflow settings
		self._actionList = jobs_config.getList('action', ['check', 'retrieve', 'submit'], onChange = None)
		self.duration = 0
		if jobs_config.getBool('continuous', False, onChange = None): # legacy option
			self.duration = -1
		self.duration = jobs_config.getTime('duration', self.duration, onChange = None)
		self._submitFlag = jobs_config.getBool('submission', True, onChange = None)
		self._submitTime = jobs_config.getTime('submission time requirement', self.task.wallTime, onChange = None)

		# Initialise GUI
		self._gui = jobs_config.getPlugin('gui', 'SimpleConsole', cls = GUI, onChange = None, pargs = (self,))
Пример #41
0
 def getJobConfig(self, jobNum):
     data = CMSSW.getJobConfig(self, jobNum)
     nickdata = self.getVarsForNick(data.get("DATASETNICK"))
     data.update(nickdata)
     data["LUMI_RANGE"] = self.getActiveLumiFilter(data["LUMI_RANGE"], jobNum)
     if utils.verbosity() > 0:
         utils.vprint("Nickname: %s" % data.get("DATASETNICK"), 1)
         utils.vprint(" * Config files: %s" % data["CMSSW_CONFIG"], 1)
         utils.vprint(" *   Lumi range: %s" % data["LUMI_RANGE"], 1)
         utils.vprint(
             " *    Variables: %s" % utils.filterDict(nickdata, lambda k: k not in ["CMSSW_CONFIG", "LUMI_RANGE"]), 1
         )
     return data
Пример #42
0
	def _getUserSource(self, pExpr, parent):
		tokens = tokenize(pExpr, lchain([self.precedence.keys(), list('()[]<>')]))
		tokens = list(tok2inlinetok(tokens, list(self.precedence.keys())))
		utils.vprint('Parsing parameter string: "%s"' % str.join(' ', imap(str, tokens)), 0)
		tree = tok2tree(tokens, self.precedence)

		source_list = self.tree2expr(tree)
		if DataParameterSource.datasetsAvailable and not DataParameterSource.datasetsUsed:
			source_list.insert(0, DataParameterSource.create())
		if parent:
			source_list.append(parent)
		if len(lfilter(lambda p: p.getMaxParameters() is not None, source_list)) > 1:
			source = self.combineSources(CrossParameterSource, source_list)
		else:
			source = self.combineSources(ZipLongParameterSource, source_list) # zip more efficient
		assert(len(source) == 1)
		source = source[0]
		for (PSourceClass, args) in self.elevatedSwitch:
			source = PSourceClass(source, *args)
		utils.vprint('Parsing output: %r' % source, 0)
		return source
Пример #43
0
    def initPSpace(self):
        result = []

        def addEntry(pNum):
            tmp = {ParameterInfo.ACTIVE: True, ParameterInfo.REQS: []}
            self._psource.fillParameterInfo(pNum, tmp)
            lookupResult = self._matcher.lookup(tmp)
            if lookupResult:
                for (lookupIdx, tmp) in enumerate(lookupResult):
                    result.append((pNum, lookupIdx))

        if self._psource.getMaxParameters() is None:
            addEntry(None)
        else:
            for pNum in irange(self._psource.getMaxParameters()):
                addEntry(pNum)
        if len(result) == 0:
            utils.vprint(
                'Lookup parameter "%s" has no matching entries!' % self._key,
                -1)
        return result
Пример #44
0
 def doTransfer(self, listDescSourceTarget):
     for (desc, source, target) in listDescSourceTarget:
         if not self.smPaths:
             raise ConfigError(
                 "%s can't be transferred because '%s path wasn't set" %
                 (desc, self.smOptPrefix))
         for idx, sePath in enumerate(set(self.smPaths)):
             utils.vprint('Copy %s to SE %d ' % (desc, idx + 1),
                          -1,
                          newline=False)
             sys.stdout.flush()
             proc = se_copy(source, os.path.join(sePath, target),
                            self.smForce)
             if proc.status(timeout=5 * 60, terminate=True) == 0:
                 utils.vprint('finished', -1)
             else:
                 utils.vprint('failed', -1)
                 utils.eprint(proc.stderr.read(timeout=0))
                 utils.eprint(
                     'Unable to copy %s! You can try to copy it manually.' %
                     desc)
                 if not utils.getUserBool(
                         'Is %s (%s) available on SE %s?' %
                     (desc, source, sePath), False):
                     raise StorageError('%s is missing on SE %s!' %
                                        (desc, sePath))
Пример #45
0
 def display(self):
     reports = []
     for jobNum in self._jobs:
         jobObj = self._jobDB.get(jobNum)
         if not jobObj or (jobObj.state == Job.INIT):
             continue
         reports.append({
             0: jobNum,
             1: Job.enum2str(jobObj.state),
             2: jobObj.wmsId
         })
         if utils.verbosity() > 0:
             history = jobObj.history.items()
             history.reverse()
             for at, dest in history:
                 if dest != 'N/A':
                     reports.append({1: at, 2: ' -> ' + dest})
         elif jobObj.get('dest', 'N/A') != 'N/A':
             reports.append({2: ' -> ' + jobObj.get('dest')})
     utils.printTabular(
         lzip(irange(3), ['Job', 'Status / Attempt', 'Id / Destination']),
         reports, 'rcl')
     utils.vprint()
Пример #46
0
 def getPhedexSEList(self, blockPath, dictSE):
     dictSE[blockPath] = []
     url = 'https://cmsweb.cern.ch/phedex/datasvc/json/prod/blockreplicas'
     for phedexBlock in readJSON(url,
                                 {'block': blockPath})['phedex']['block']:
         for replica in phedexBlock['replica']:
             if self.nodeFilter(replica['node'],
                                replica['complete'] == 'y'):
                 location = None
                 if self._locationFormat == CMSLocationFormat.hostname:
                     location = replica.get('se')
                 elif self._locationFormat == CMSLocationFormat.siteDB:
                     location = replica.get('node')
                 elif (self._locationFormat == CMSLocationFormat.both) and (
                         replica.get('node') or replica.get('se')):
                     location = '%s/%s' % (replica.get('node'),
                                           replica.get('se'))
                 if location:
                     dictSE[blockPath].append(location)
                 else:
                     utils.vprint(
                         'Warning: Dataset block %s replica at %s / %s is skipped!'
                         % (blockPath, replica.get('node'),
                            replica.get('se')), -1)
Пример #47
0
	def _CleanSocket(self):
		if not os.path.exists(self.sshLink):
			vprint("No Socket %s" % self.sshLink)
			return True
		vprint("Killing Socket %s" % self.sshLink)
#		killSocket = LoggedProcess( " ".join([self.cmd, self.defaultArgs, self.socketArgsDef, "-O exit", self.remoteHost]) )
#		while killSocket.poll() == -1:
#			print "poll", killSocket.poll()
#			time.sleep(0.5)
#			timeout += 0.5
#			if timeout == 5:
#				vprint("Failed to cancel ssh Socket...\n%s" % self.sshLink, level=1)
#				return False
#		print "done", killSocket.poll()
		timeout = 0
		while os.path.exists(self.sshLink):
			vprint("exists %d" % timeout)
			time.sleep(0.5)
			timeout += 0.5
			#if timeout == 5:
			#	vprint("Failed to remove ssh Socket...\n%s" % self.sshLink, level=1)
			#	return False
		return True
Пример #48
0
 def submitJobs(self, jobNumList, task):  # jobNumList = [1, 2, ...]
     utils.vprint(
         'Inactive WMS (%s): Discarded submission of %d jobs' %
         (self.wmsName, len(jobNumList)), -1)
Пример #49
0
    def display(self):
        summary = lmap(lambda x: 0.0, Job.enumNames)
        defaultJob = Job()
        for jobNum in self._jobs:
            summary[self._jobDB.get(jobNum, defaultJob).state] += 1
        makeSum = lambda *states: sum(imap(lambda z: summary[z], states))
        makePer = lambda *states: [
            makeSum(*states),
            round(makeSum(*states) / len(self._jobDB) * 100.0)
        ]

        # Print report summary
        self._printHeader('REPORT SUMMARY:')
        njobs_total = len(self._jobDB)
        jobov_succ = makePer(Job.SUCCESS)
        utils.vprint(
            'Total number of jobs:%9d     Successful jobs:%8d  %3d%%' %
            tuple([njobs_total] + jobov_succ), -1)
        njobs_assigned = makeSum(Job.SUBMITTED, Job.WAITING, Job.READY,
                                 Job.QUEUED, Job.RUNNING)
        jobov_fail = makePer(Job.ABORTED, Job.CANCELLED, Job.FAILED)
        utils.vprint(
            'Jobs assigned to WMS:%9d        Failing jobs:%8d  %3d%%' %
            tuple([njobs_assigned] + jobov_fail), -1)
        utils.vprint(' ' * 65 + '\nDetailed Status Information:      ',
                     -1,
                     newline=False)
        ignored = len(self._jobDB) - sum(summary)
        if ignored:
            utils.vprint(
                '(Jobs    IGNORED:%8d  %3d%%)' %
                (ignored, ignored / len(self._jobDB) * 100.0), -1)
        else:
            utils.vprint(' ' * 31, -1)
        for stateNum, category in enumerate(Job.enumNames):
            utils.vprint('Jobs  %9s:%8d  %3d%%     ' %
                         tuple([category] + makePer(stateNum)),
                         -1,
                         newline=stateNum % 2)
        utils.vprint('-' * 65, -1)
        return 0
Пример #50
0
 def _printHeader(self, message, level=-1):
     utils.vprint('-' * 65, level)
     utils.vprint(message + self._header.rjust(65 - len(message)), level)
     utils.vprint(('-' * 15).ljust(65), level)
Пример #51
0
    def __init__(self, config, name, abort=None):
        NamedPlugin.__init__(self, config, name)

        # Workdir settings
        self._workDir = config.getWorkPath()
        self._checkSpace = config.getInt('workdir space', 10, onChange=None)

        # Initialise task module
        self.task = config.getPlugin(['module', 'task'],
                                     cls=TaskModule,
                                     tags=[self])
        if abort == 'task':
            return
        utils.vprint('Current task ID: %s' % self.task.taskID, -1)
        utils.vprint('Task started on %s' % self.task.taskDate, -1)

        # Initialise workload management interface
        self.wms = config.getCompositePlugin('backend',
                                             'grid',
                                             'MultiWMS',
                                             cls=WMS,
                                             tags=[self, self.task])

        # Subsequent config calls also include section "jobs":
        jobs_config = config.changeView(viewClass='TaggedConfigView',
                                        addSections=['jobs'],
                                        addTags=[self])

        # Initialise monitoring module
        self.monitor = jobs_config.getCompositePlugin('monitor',
                                                      'scripts',
                                                      'MultiMonitor',
                                                      cls=Monitoring,
                                                      tags=[self, self.task],
                                                      pargs=(self.task, ))

        # Initialise job database
        self.jobManager = jobs_config.getPlugin(
            'job manager',
            'SimpleJobManager',
            cls=JobManager,
            tags=[self, self.task, self.wms],
            pargs=(self.task, self.monitor))

        # Prepare work package
        self.wms.deployTask(self.task, self.monitor)

        # Configure workflow settings
        self._actionList = jobs_config.getList('action',
                                               ['check', 'retrieve', 'submit'],
                                               onChange=None)
        self.duration = 0
        if jobs_config.getBool('continuous', False,
                               onChange=None):  # legacy option
            self.duration = -1
        self.duration = jobs_config.getTime('duration',
                                            self.duration,
                                            onChange=None)
        self._submitFlag = jobs_config.getBool('submission',
                                               True,
                                               onChange=None)
        self._submitTime = jobs_config.getTime('submission time requirement',
                                               self.task.wallTime,
                                               onChange=None)

        # Initialise GUI
        self._gui = jobs_config.getPlugin('gui',
                                          'SimpleConsole',
                                          cls=GUI,
                                          onChange=None,
                                          pargs=(self, ))
Пример #52
0
 def checkJobs(self, ids):  # ids = [(WMS-61226, 1), (WMS-61227, 2), ...]
     utils.vprint(
         'Inactive WMS (%s): Discarded check of %d jobs' %
         (self.wmsName, len(ids)), -1)
Пример #53
0
 def cancelJobs(self, ids):
     utils.vprint(
         'Inactive WMS (%s): Discarded abort of %d jobs' %
         (self.wmsName, len(ids)), -1)
Пример #54
0
    def checkJobs(self, wmsJobIdList):
        if len(wmsJobIdList) == 0:
            raise StopIteration
        self.debugOut('Started checking: %s' % set(lzip(*wmsJobIdList)[0]))
        self.debugPool()

        wmsIdList = list(self._getRawIDs(wmsJobIdList))
        wmsIdArgument = ' '.join(wmsIdList)
        wmsToJobMap = dict(wmsJobIdList)

        activity = utils.ActivityLog('fetching job status')
        statusProcess = self.Pool.LoggedExecute(
            self.statusExec, '%(format)s %(jobIDs)s' % {
                "jobIDs": wmsIdArgument,
                "format": self.statusReturnFormat
            })
        activity.finish()

        activity = utils.ActivityLog('checking job status')
        # process all lines of the status executable output
        utils.vprint('querrying condor_q', 2)
        for statusReturnLine in statusProcess.iter():
            try:
                # test if wmsID job was requested, then extact data and remove from check list
                if statusReturnLine.split()[0] in wmsIdList:
                    (jobID, wmsID, status,
                     jobinfo) = self._statusReturnLineRead(statusReturnLine)
                    wmsIdList.remove(wmsID)
                    yield (jobID, self._createId(wmsID), status, jobinfo)
            except Exception:
                raise BackendError('Error reading job status info:\n%s' %
                                   statusReturnLine)

        # cleanup after final yield
        retCode = statusProcess.wait()
        if retCode != 0:
            if self.explainError(statusProcess, retCode):
                pass
            else:
                statusProcess.logError(self.errorLog, brief=True)
        activity.finish()

        self.debugOut("Remaining after condor_q: %s" % wmsIdList)
        # jobs not in queue have either succeeded or failed - both is considered 'Done' for GC
        # if no additional information is required, consider everything we couldn't find as done
        if retCode == 0:
            for wmsID in list(wmsIdList):
                wmsIdList.remove(wmsID)
                wmsID = self._createId(wmsID)
                yield (wmsToJobMap[wmsID], wmsID, Job.DONE, {})
        # TODO: querry log on properly configured pool
        # querying the history can be SLOW! only do when necessary and possible
        if False and len(wmsIdList) > 0 and self.remoteType != PoolType.SPOOL:
            utils.vprint('querrying condor_history', 2)
            # querying the history can be VERY slow! Only do so bit by bit if possible
            if self.historyFile:
                historyList = sorted([
                    "-f " + file for file in ifilter(
                        os.path.isfile, glob.glob(self.historyFile + "*"))
                ])
            else:
                historyList = [""]
            # query the history file by file until no more jobs need updating
            for historyFile in historyList:
                if len(wmsIdList) > 0:
                    statusArgs = '%(fileQuery)s %(format)s %(jobIDs)s' % {
                        "fileQuery": historyFile,
                        "jobIDs": " ",
                        "format": self.statusReturnFormat
                    }
                    statusProcess = self.Pool.LoggedExecute(
                        self.historyExec, statusArgs)
                    for statusReturnLine in statusProcess.iter():
                        # test if line starts with a number and was requested
                        try:
                            # test if wmsID job was requested, then extact data and remove from check list
                            if statusReturnLine.split()[0] in wmsIdList:
                                (jobID, wmsID, status,
                                 jobinfo) = self._statusReturnLineRead(
                                     statusReturnLine)
                                wmsIdList.remove(wmsID)
                                yield (jobID, self._createId(wmsID), status,
                                       jobinfo)
                        except Exception:
                            raise BackendError(
                                'Error reading job status info:\n%s' %
                                statusReturnLine)

                    # cleanup after final yield
                    retCode = statusProcess.wait()
                    if retCode != 0:
                        if self.explainError(statusProcess, retCode):
                            pass
                        else:
                            statusProcess.logError(self.errorLog, brief=True)
        self.debugFlush()
Пример #55
0
    def __init__(self, config, wmsName):
        utils.vprint('Using batch system: Condor/GlideInWMS', -1)
        BasicWMS.__init__(self, config, wmsName)
        # special debug out/messages/annotations - may have noticeable effect on storage and performance!
        debugLogFN = config.get('debugLog', '')
        self.debug = False
        if debugLogFN:
            self.debug = open(debugLogFN, 'a')
        ######
        self.taskID = config.get('task id',
                                 md5(str(time.time())).hexdigest(),
                                 persistent=True)  # FIXME!
        self.debugOut("""

		#############################
		Initialized Condor/GlideInWMS
		#############################
		Config: %s
		taskID: %s
		Name:   %s
		#############################

		""" % (config.getConfigName(), self.taskID, wmsName))
        # finalize config state by reading values or setting to defaults
        self.settings = {
            'jdl': {
                'Universe': config.get('Universe', 'vanilla'),
                'NotifyEmail': config.get('NotifyEmail', ''),
                'ClassAdData': config.getList('ClassAdData', []),
                'JDLData': config.getList('JDLData', [])
            },
            'pool': {
                'hosts': config.getList('PoolHostList', [])
            }
        }
        # prepare interfaces for local/remote/ssh pool access
        self._initPoolInterfaces(config)
        # load keys for condor pool ClassAds
        self.poolReqs = config.getDict('poolArgs req', {})[0]
        self.poolQuery = config.getDict('poolArgs query', {})[0]
        self._formatStatusReturnQuery(config)
        # Sandbox base path where individual job data is stored, staged and returned to
        self.sandPath = config.getPath('sandbox path',
                                       config.getWorkPath('sandbox'),
                                       mustExist=False)
        # history query is faster with split files - check if and how this is used
        # default condor_history command works WITHOUT explicitly specified file
        self.historyFile = None
        if self.remoteType == PoolType.LOCAL and getoutput(
                self.configValExec +
                ' ENABLE_HISTORY_ROTATION').lower() == 'true':
            self.historyFile = getoutput(self.configValExec + ' HISTORY')
            if not os.path.isfile(self.historyFile):
                self.historyFile = None
        # broker for selecting Sites
        self.brokerSite = config.getPlugin('site broker',
                                           'UserBroker',
                                           cls=Broker,
                                           tags=[self],
                                           pargs=('sites', 'sites',
                                                  self.getSites))
        self.debugFlush()
Пример #56
0
 def retrieveJobs(self, ids):
     utils.vprint(
         'Inactive WMS (%s): Discarded retrieval of %d jobs' %
         (self.wmsName, len(ids)), -1)