def deployTask(self, task, monitor): self.outputFiles = lmap(lambda d_s_t: d_s_t[2], self._getSandboxFilesOut(task)) # HACK task.validateVariables() self.smSEIn.addFiles( lmap(lambda d_s_t: d_s_t[2], task.getSEInFiles())) # add task SE files to SM # Transfer common SE files if self.config.getState('init', detail='storage'): self.smSEIn.doTransfer(task.getSEInFiles()) def convert(fnList): for fn in fnList: if isinstance(fn, str): yield (fn, os.path.basename(fn), False) else: yield (None, os.path.basename(fn.name), fn) # Package sandbox tar file self._log.log(logging.INFO1, 'Packing sandbox') sandbox = self._getSandboxName(task) utils.ensureDirExists(os.path.dirname(sandbox), 'sandbox directory') if not os.path.exists(sandbox) or self.config.getState( 'init', detail='sandbox'): utils.genTarball( sandbox, convert( self._getSandboxFiles(task, monitor, [self.smSEIn, self.smSEOut])))
def _readJobs(self, jobLimit): utils.ensureDirExists(self._dbPath, 'job database directory', JobError) candidates = [] for jobFile in fnmatch.filter(os.listdir(self._dbPath), 'job_*.txt'): try: # 2xsplit is faster than regex jobNum = int(jobFile.split(".")[0].split("_")[1]) except Exception: continue candidates.append((jobNum, jobFile)) (jobMap, maxJobs) = ({}, len(candidates)) activity = Activity('Reading job infos') idx = 0 for (jobNum, jobFile) in sorted(candidates): idx += 1 if (jobLimit >= 0) and (jobNum >= jobLimit): self._log.info('Stopped reading job infos at job #%d out of %d available job files, since the limit of %d jobs is reached', jobNum, len(candidates), jobLimit) break jobObj = self._load_job(os.path.join(self._dbPath, jobFile)) jobMap[jobNum] = jobObj if idx % 100 == 0: activity.update('Reading job infos %d [%d%%]' % (idx, (100.0 * idx) / maxJobs)) activity.finish() return jobMap
def __init__(self, config, wmsName): WMS.__init__(self, config, wmsName) if self.wmsName != self.__class__.__name__.upper(): utils.vprint('Using batch system: %s (%s)' % (self.__class__.__name__, self.wmsName), -1) else: utils.vprint('Using batch system: %s' % self.wmsName, -1) self.errorLog = config.getWorkPath('error.tar') self._runlib = config.getWorkPath('gc-run.lib') if not os.path.exists(self._runlib): fp = SafeFile(self._runlib, 'w') content = SafeFile(utils.pathShare('gc-run.lib')).read() fp.write(content.replace('__GC_VERSION__', __import__('grid_control').__version__)) fp.close() self._outputPath = config.getWorkPath('output') utils.ensureDirExists(self._outputPath, 'output directory') self._failPath = config.getWorkPath('fail') # Initialise access token, broker and storage manager self._token = config.getCompositePlugin(['proxy', 'access token'], 'TrivialAccessToken', 'MultiAccessToken', cls = AccessToken, inherit = True, tags = [self]) # UI -> SE -> WN self.smSEIn = config.getPlugin('se input manager', 'SEStorageManager', cls = StorageManager, tags = [self], pargs = ('se', 'se input', 'SE_INPUT')) self.smSBIn = config.getPlugin('sb input manager', 'LocalSBStorageManager', cls = StorageManager, tags = [self], pargs = ('sandbox', 'sandbox', 'SB_INPUT')) # UI <- SE <- WN self.smSEOut = config.getPlugin('se output manager', 'SEStorageManager', cls = StorageManager, tags = [self], pargs = ('se', 'se output', 'SE_OUTPUT')) self.smSBOut = None self.fileNamesEnvironment = config.getBool("file names environment", True, onChange = None)
def _initSockets(self, **kwargs): self._needSocket = kwargs.get("needSocket", True) self._socketMinSec = kwargs.get("socketMinSec", 300) self._socketCount = max(2, kwargs.get("socketCount", 2)) self._socketIndex = 0 self._socketMaxMiss = kwargs.get("socketMaxMiss", 2) self._socketMisses = 0 # sockets should reside in secure, managed directory if kwargs.get("socketDir", "") and len(kwargs.get("socketDir")) < 105: self._socketDir = kwargs.get("socketDir") ensureDirExists(self._socketDir, name="SSH connection socket container directory") else: self._socketDir = tempfile.mkdtemp() self._log(logging.DEBUG1, 'Using socket directoy %s' % self._socketDir) # create list of socket names and corresponding arguments to rotate through self._socketList = [ os.path.join(self._socketDir, str(socketIndex)) for socketIndex in irange(self._socketCount) ] self._socketArgList = [[ "-oControlMaster=auto", "-oControlPath=%s" % socket ] for socket in self._socketList] self._socketProcs = {}
def freezeConfig(self, writeConfig=True): self._curContainer.setReadOnly() # Inform the user about unused options unused = lfilter( lambda entry: ('!' not in entry.section) and not entry.accessed, self._view.iterContent()) log = logging.getLogger('config.freeze') if unused: log.log(logging.INFO1, 'There are %s unused config options!', len(unused)) for entry in unused: log.log(logging.INFO1, '\t%s', entry.format(printSection=True)) if writeConfig or not os.path.exists(self._oldCfgPath): ensureDirExists(os.path.dirname(self._oldCfgPath), 'config storage directory', ConfigError) # Write user friendly, flat config file and config file with saved settings self._write_file(self._flatCfgPath, printDefault=False, printUnused=False, printMinimal=True, printWorkdir=True) self._write_file( self._oldCfgPath, printDefault=True, printUnused=True, printMinimal=True, printSource=True, message= '; ==> DO NOT EDIT THIS FILE! <==\n; This file is used to find config changes!\n' )
def _readJobs(self, jobLimit): utils.ensureDirExists(self._dbPath, 'job database directory', JobError) candidates = [] for jobFile in fnmatch.filter(os.listdir(self._dbPath), 'job_*.txt'): try: # 2xsplit is faster than regex jobNum = int(jobFile.split(".")[0].split("_")[1]) except Exception: continue candidates.append((jobNum, jobFile)) (jobMap, maxJobs) = ({}, len(candidates)) activity = Activity('Reading job infos') idx = 0 for (jobNum, jobFile) in sorted(candidates): idx += 1 if (jobLimit >= 0) and (jobNum >= jobLimit): self._log.info( 'Stopped reading job infos at job #%d out of %d available job files, since the limit of %d jobs is reached', jobNum, len(candidates), jobLimit) break jobObj = self._load_job(os.path.join(self._dbPath, jobFile)) jobMap[jobNum] = jobObj if idx % 100 == 0: activity.update('Reading job infos %d [%d%%]' % (idx, (100.0 * idx) / maxJobs)) activity.finish() return jobMap
def retrieveJobs(self, gcID_jobNum_List): # Process output sandboxes returned by getJobsOutput # Function to force moving a directory def forceMove(source, target): try: if os.path.exists(target): shutil.rmtree(target) except IOError: self._log.exception('%r cannot be removed', target) return False try: shutil.move(source, target) except IOError: self._log.exception('Error moving job output directory from %r to %r', source, target) return False return True retrievedJobs = [] for inJobNum, pathName in self._getJobsOutput(gcID_jobNum_List): # inJobNum != None, pathName == None => Job could not be retrieved if pathName is None: if inJobNum not in retrievedJobs: yield (inJobNum, -1, {}, None) continue # inJobNum == None, pathName != None => Found leftovers of job retrieval if inJobNum is None: continue # inJobNum != None, pathName != None => Job retrieval from WMS was ok jobFile = os.path.join(pathName, 'job.info') try: job_info = self._job_parser.process(pathName) except Exception: self._log.exception('Unable to parse job.info') job_info = None if job_info: jobNum = job_info[JobResult.JOBNUM] if jobNum != inJobNum: raise BackendError('Invalid job id in job file %s' % jobFile) if forceMove(pathName, os.path.join(self._outputPath, 'job_%d' % jobNum)): retrievedJobs.append(inJobNum) yield (jobNum, job_info[JobResult.EXITCODE], job_info[JobResult.RAW], pathName) else: yield (jobNum, -1, {}, None) continue # Clean empty pathNames for subDir in imap(lambda x: x[0], os.walk(pathName, topdown=False)): try: os.rmdir(subDir) except Exception: clear_current_exception() if os.path.exists(pathName): # Preserve failed job utils.ensureDirExists(self._failPath, 'failed output directory') forceMove(pathName, os.path.join(self._failPath, os.path.basename(pathName))) yield (inJobNum, -1, {}, None)
def main(): configFactory = createConfigFactory(configFile = args[0], additional = [OptsConfigFiller(parser)]) config = configFactory.getConfig() logging_setup(config.changeView(setSections = ['logging'])) # Check work dir validity (default work directory is the config file name) if not os.path.exists(config.getWorkPath()): if not config.getState('init'): utils.vprint('Will force initialization of %s if continued!' % config.getWorkPath(), -1) config.setState(True, 'init') if config.getChoiceYesNo('workdir create', True, interactive = 'Do you want to create the working directory %s?' % config.getWorkPath()): utils.ensureDirExists(config.getWorkPath(), 'work directory') # Create workflow and freeze config settings globalConfig = config.changeView(setSections = ['global']) workflow = globalConfig.getPlugin('workflow', 'Workflow:global', cls = Workflow).getInstance() configFactory.freezeConfig(writeConfig = config.getState('init', detail = 'config')) # Give config help if opts.help_cfg or opts.help_scfg: config.write(sys.stdout, printDefault = opts.help_cfg, printUnused = False, printMinimal = opts.help_scfg, printSource = opts.help_cfg) sys.exit(os.EX_OK) # Check if user requested deletion / reset of jobs if opts.delete: workflow.jobManager.delete(workflow.wms, opts.delete) sys.exit(os.EX_OK) if opts.reset: workflow.jobManager.reset(workflow.wms, opts.reset) sys.exit(os.EX_OK) # Run the configured workflow workflow.run()
def gc_create_workflow(config): # set up signal handler for interrupts and debug session requests signal.signal(signal.SIGURG, handle_debug_interrupt) signal.signal(signal.SIGINT, handle_abort_interrupt) # Configure logging settings logging_setup(config.changeView(setSections=['logging'])) global_config = config.changeView(setSections=['global']) # Check work dir validity (default work directory is the config file name) if not os.path.exists(global_config.getWorkPath()): if not global_config.getState('init'): logging.getLogger('user').warning('Starting initialization of %s!', global_config.getWorkPath()) global_config.setState(True, 'init') if global_config.getChoiceYesNo( 'workdir create', True, interactive_msg= 'Do you want to create the working directory %s?' % global_config.getWorkPath()): utils.ensureDirExists(global_config.getWorkPath(), 'work directory') for package_paths in global_config.getPaths('package paths', []): init_hpf_plugins(package_paths) # Query config settings before config is frozen help_cfg = global_config.getState('display', detail='config') help_scfg = global_config.getState('display', detail='minimal config') action_config = config.changeView(setSections=['action']) action_delete = action_config.get('delete', '', onChange=None) action_reset = action_config.get('reset', '', onChange=None) # Create workflow and freeze config settings workflow = global_config.getPlugin('workflow', 'Workflow:global', cls='Workflow') config.factory.freezeConfig( writeConfig=config.getState('init', detail='config')) # Give config help if help_cfg or help_scfg: config.write(sys.stdout, printDefault=help_cfg, printUnused=False, printMinimal=help_scfg, printSource=help_cfg) sys.exit(os.EX_OK) # Check if user requested deletion / reset of jobs if action_delete: workflow.jobManager.delete(workflow.wms, action_delete) sys.exit(os.EX_OK) if action_reset: workflow.jobManager.reset(workflow.wms, action_reset) sys.exit(os.EX_OK) return workflow
def _getJobsOutput(self, ids): if len(ids) == 0: raise StopIteration basePath = os.path.join(self._outputPath, 'tmp') try: if len(ids) == 1: # For single jobs create single subdir tmpPath = os.path.join(basePath, md5(ids[0][0]).hexdigest()) else: tmpPath = basePath utils.ensureDirExists(tmpPath) except Exception: raise BackendError('Temporary path "%s" could not be created.' % tmpPath, BackendError) jobNumMap = dict(ids) jobs = self.writeWMSIds(ids) activity = Activity('retrieving %d job outputs' % len(ids)) proc = LocalProcess(self._outputExec, '--noint', '--logfile', '/dev/stderr', '-i', jobs, '--dir', tmpPath) # yield output dirs todo = jobNumMap.values() currentJobNum = None for line in imap(str.strip, proc.stdout.iter(timeout = 60)): if line.startswith(tmpPath): todo.remove(currentJobNum) outputDir = line.strip() if os.path.exists(outputDir): if 'GC_WC.tar.gz' in os.listdir(outputDir): wildcardTar = os.path.join(outputDir, 'GC_WC.tar.gz') try: tarfile.TarFile.open(wildcardTar, 'r:gz').extractall(outputDir) os.unlink(wildcardTar) except Exception: self._log.error('Can\'t unpack output files contained in %s', wildcardTar) yield (currentJobNum, line.strip()) currentJobNum = None else: currentJobNum = jobNumMap.get(self._createId(line), currentJobNum) retCode = proc.status(timeout = 0, terminate = True) activity.finish() if retCode != 0: if 'Keyboard interrupt raised by user' in proc.stderr.read(timeout = 0): utils.removeFiles([jobs, basePath]) raise StopIteration else: self._log.log_process(proc, files = {'jobs': SafeFile(jobs).read()}) self._log.error('Trying to recover from error ...') for dirName in os.listdir(basePath): yield (None, os.path.join(basePath, dirName)) # return unretrievable jobs for jobNum in todo: yield (jobNum, None) utils.removeFiles([jobs, basePath])
def saveToFile(path, dataBlocks, stripMetadata = False): if os.path.dirname(path): utils.ensureDirExists(os.path.dirname(path), 'dataset cache directory') fp = open(path, 'w') try: for _ in DataProvider.saveToStream(fp, dataBlocks, stripMetadata): pass finally: fp.close()
def saveToFile(path, dataBlocks, stripMetadata=False): if os.path.dirname(path): utils.ensureDirExists(os.path.dirname(path), 'dataset cache directory') fp = open(path, 'w') try: for _ in DataProvider.saveToStream(fp, dataBlocks, stripMetadata): pass finally: fp.close()
def __init__(self, config, name, checkExecutor, cancelExecutor): WMS.__init__(self, config, name) for executor in [checkExecutor, cancelExecutor]: executor.setup(self._log) (self._check_executor, self._cancel_executor) = (checkExecutor, cancelExecutor) if self._name != self.__class__.__name__.upper(): self._log.info('Using batch system: %s (%s)', self.__class__.__name__, self._name) else: self._log.info('Using batch system: %s', self._name) self.errorLog = config.getWorkPath('error.tar') self._runlib = config.getWorkPath('gc-run.lib') if not os.path.exists(self._runlib): fp = SafeFile(self._runlib, 'w') content = SafeFile(utils.pathShare('gc-run.lib')).read() fp.write( content.replace('__GC_VERSION__', __import__('grid_control').__version__)) fp.close() self._outputPath = config.getWorkPath('output') self._filecachePath = config.getWorkPath('files') utils.ensureDirExists(self._outputPath, 'output directory') self._failPath = config.getWorkPath('fail') # Initialise access token and storage managers # UI -> SE -> WN self.smSEIn = config.getPlugin('se input manager', 'SEStorageManager', cls=StorageManager, tags=[self], pargs=('se', 'se input', 'SE_INPUT')) self.smSBIn = config.getPlugin('sb input manager', 'LocalSBStorageManager', cls=StorageManager, tags=[self], pargs=('sandbox', 'sandbox', 'SB_INPUT')) # UI <- SE <- WN self.smSEOut = config.getPlugin('se output manager', 'SEStorageManager', cls=StorageManager, tags=[self], pargs=('se', 'se output', 'SE_OUTPUT')) self.smSBOut = None self._token = config.getCompositePlugin(['proxy', 'access token'], 'TrivialAccessToken', 'MultiAccessToken', cls=AccessToken, inherit=True, tags=[self])
def __init__(self, config, wmsName): WMS.__init__(self, config, wmsName) if self.wmsName != self.__class__.__name__.upper(): utils.vprint( 'Using batch system: %s (%s)' % (self.__class__.__name__, self.wmsName), -1) else: utils.vprint('Using batch system: %s' % self.wmsName, -1) self.errorLog = config.getWorkPath('error.tar') self._runlib = config.getWorkPath('gc-run.lib') if not os.path.exists(self._runlib): fp = SafeFile(self._runlib, 'w') content = SafeFile(utils.pathShare('gc-run.lib')).read() fp.write( content.replace('__GC_VERSION__', __import__('grid_control').__version__)) fp.close() self._outputPath = config.getWorkPath('output') utils.ensureDirExists(self._outputPath, 'output directory') self._failPath = config.getWorkPath('fail') # Initialise access token, broker and storage manager self._token = config.getCompositePlugin(['proxy', 'access token'], 'TrivialAccessToken', 'MultiAccessToken', cls=AccessToken, inherit=True, tags=[self]) # UI -> SE -> WN self.smSEIn = config.getPlugin('se input manager', 'SEStorageManager', cls=StorageManager, tags=[self], pargs=('se', 'se input', 'SE_INPUT')) self.smSBIn = config.getPlugin('sb input manager', 'LocalSBStorageManager', cls=StorageManager, tags=[self], pargs=('sandbox', 'sandbox', 'SB_INPUT')) # UI <- SE <- WN self.smSEOut = config.getPlugin('se output manager', 'SEStorageManager', cls=StorageManager, tags=[self], pargs=('se', 'se output', 'SE_OUTPUT')) self.smSBOut = None self.fileNamesEnvironment = config.getBool("file names environment", True, onChange=None)
def freezeConfig(self, writeConfig = True): self._curContainer.setReadOnly() # Inform the user about unused options unused = lfilter(lambda entry: ('!' not in entry.section) and not entry.accessed, self._view.iterContent()) log = logging.getLogger('config.freeze') if unused: log.log(logging.INFO1, 'There are %s unused config options!', len(unused)) for entry in unused: log.log(logging.INFO1, '\t%s', entry.format(printSection = True)) if writeConfig or not os.path.exists(self._oldCfgPath): ensureDirExists(os.path.dirname(self._oldCfgPath), 'config storage directory', ConfigError) # Write user friendly, flat config file and config file with saved settings self._write_file(self._flatCfgPath, printDefault = False, printUnused = False, printMinimal = True, printWorkdir = True) self._write_file(self._oldCfgPath, printDefault = True, printUnused = True, printMinimal = True, printSource = True, message = '; ==> DO NOT EDIT THIS FILE! <==\n; This file is used to find config changes!\n')
def gc_create_workflow(config): # set up signal handler for interrupts and debug session requests signal.signal(signal.SIGURG, handle_debug_interrupt) signal.signal(signal.SIGINT, handle_abort_interrupt) # Configure logging settings logging_setup(config.changeView(setSections = ['logging'])) global_config = config.changeView(setSections = ['global']) # Check work dir validity (default work directory is the config file name) if not os.path.exists(global_config.getWorkPath()): if not global_config.getState('init'): logging.getLogger('user').warning('Starting initialization of %s!', global_config.getWorkPath()) global_config.setState(True, 'init') if global_config.getChoiceYesNo('workdir create', True, interactive_msg = 'Do you want to create the working directory %s?' % global_config.getWorkPath()): utils.ensureDirExists(global_config.getWorkPath(), 'work directory') for package_paths in global_config.getPaths('package paths', []): init_hpf_plugins(package_paths) # Query config settings before config is frozen help_cfg = global_config.getState('display', detail = 'config') help_scfg = global_config.getState('display', detail = 'minimal config') action_config = config.changeView(setSections = ['action']) action_delete = action_config.get('delete', '', onChange = None) action_reset = action_config.get('reset', '', onChange = None) # Create workflow and freeze config settings workflow = global_config.getPlugin('workflow', 'Workflow:global', cls = 'Workflow') config.factory.freezeConfig(writeConfig = config.getState('init', detail = 'config')) # Give config help if help_cfg or help_scfg: config.write(sys.stdout, printDefault = help_cfg, printUnused = False, printMinimal = help_scfg, printSource = help_cfg) sys.exit(os.EX_OK) # Check if user requested deletion / reset of jobs if action_delete: workflow.jobManager.delete(workflow.wms, action_delete) sys.exit(os.EX_OK) if action_reset: workflow.jobManager.reset(workflow.wms, action_reset) sys.exit(os.EX_OK) return workflow
def __init__(self, config, source): self._rawSource = source BasicParameterAdapter.__init__(self, config, source) self._mapJob2PID = {} utils.ensureDirExists(config.getWorkPath(), 'parameter storage directory', ParameterError) self._pathJob2PID = config.getWorkPath('params.map.gz') self._pathParams = config.getWorkPath('params.dat.gz') # Find out if init should be performed - overrides userResync! userInit = config.getState('init', detail = 'parameters') needInit = False if not (os.path.exists(self._pathParams) and os.path.exists(self._pathJob2PID)): needInit = True # Init needed if no parameter log exists if userInit and not needInit and (source.getMaxParameters() is not None): self._log.warning('Re-Initialization will overwrite the current mapping between jobs and parameter/dataset content! This can lead to invalid results!') if utils.getUserBool('Do you want to perform a syncronization between the current mapping and the new one to avoid this?', True): userInit = False doInit = userInit or needInit # Find out if resync should be performed userResync = config.getState('resync', detail = 'parameters') config.setState(False, 'resync', detail = 'parameters') needResync = False pHash = self._rawSource.getHash() self._storedHash = config.get('parameter hash', pHash, persistent = True) if self._storedHash != pHash: needResync = True # Resync needed if parameters have changed self._log.info('Parameter hash has changed') self._log.debug('\told hash: %s', self._storedHash) self._log.debug('\tnew hash: %s', pHash) config.setState(True, 'init', detail = 'config') doResync = (userResync or needResync) and not doInit if not doResync and not doInit: # Reuse old mapping activity = Activity('Loading cached parameter information') self._readJob2PID() activity.finish() return elif doResync: # Perform sync self._storedHash = None self._resync_state = self.resync(force = True) elif doInit: # Write current state self._writeJob2PID(self._pathJob2PID) ParameterSource.getClass('GCDumpParameterSource').write(self._pathParams, self) config.set('parameter hash', self._rawSource.getHash())
def _initSockets(self, **kwargs): self._needSocket = kwargs.get("needSocket", True) self._socketMinSec = kwargs.get("socketMinSec", 300) self._socketCount = max(2,kwargs.get("socketCount", 2)) self._socketIndex = 0 self._socketMaxMiss = kwargs.get("socketMaxMiss", 2) self._socketMisses = 0 # sockets should reside in secure, managed directory if kwargs.get("socketDir","") and len(kwargs.get("socketDir")) < 105: self._socketDir = kwargs.get("socketDir") ensureDirExists(self._socketDir, name = "SSH connection socket container directory") else: self._socketDir = tempfile.mkdtemp() self._log(logging.DEBUG1, 'Using socket directoy %s' % self._socketDir) # create list of socket names and corresponding arguments to rotate through self._socketList = [ os.path.join(self._socketDir, str(socketIndex)) for socketIndex in irange(self._socketCount) ] self._socketArgList = [ ["-oControlMaster=auto","-oControlPath=%s" % socket] for socket in self._socketList ] self._socketProcs = {}
def __init__(self, config, wmsName): WMS.__init__(self, config, wmsName) if self.wmsName != self.__class__.__name__.upper(): utils.vprint('Using batch system: %s (%s)' % (self.__class__.__name__, self.wmsName), -1) else: utils.vprint('Using batch system: %s' % self.wmsName, -1) self.errorLog = config.getWorkPath('error.tar') self._outputPath = config.getWorkPath('output') utils.ensureDirExists(self._outputPath, 'output directory') self._failPath = config.getWorkPath('fail') # Initialise access token, broker and storage manager self._token = config.getCompositePlugin(['access token', 'proxy'], 'TrivialAccessToken', 'MultiAccessToken', cls = AccessToken, inherit = True, tags = [self]).getInstance() # UI -> SE -> WN self.smSEIn = config.getPlugin('se input manager', 'SEStorageManager', cls = StorageManager, tags = [self]).getInstance('se', 'se input', 'SE_INPUT') self.smSBIn = config.getPlugin('sb input manager', 'LocalSBStorageManager', cls = StorageManager, tags = [self]).getInstance('sandbox', 'sandbox', 'SB_INPUT') # UI <- SE <- WN self.smSEOut = config.getPlugin('se output manager', 'SEStorageManager', cls = StorageManager, tags = [self]).getInstance('se', 'se output', 'SE_OUTPUT') self.smSBOut = None
def deployTask(self, task, monitor, transferSE, transferSB): self.outputFiles = lmap(lambda d_s_t: d_s_t[2], self._getSandboxFilesOut(task)) # HACK task.validateVariables() self.smSEIn.addFiles(lmap(lambda d_s_t: d_s_t[2], task.getSEInFiles())) # add task SE files to SM # Transfer common SE files if transferSE: self.smSEIn.doTransfer(task.getSEInFiles()) def convert(fnList): for fn in fnList: if isinstance(fn, str): yield (fn, os.path.basename(fn), False) else: yield (None, os.path.basename(fn.name), fn) # Package sandbox tar file self._log.log(logging.INFO1, 'Packing sandbox') sandbox = self._getSandboxName(task) utils.ensureDirExists(os.path.dirname(sandbox), 'sandbox directory') if not os.path.exists(sandbox) or transferSB: utils.genTarball(sandbox, convert(self._getSandboxFiles(task, monitor, [self.smSEIn, self.smSEOut])))
def __init__(self, config, wmsName, wmsClass): WMS.__init__(self, config, wmsName, wmsClass) if self.wmsName != self.__class__.__name__.upper(): utils.vprint('Using batch system: %s (%s)' % (self.__class__.__name__, self.wmsName), -1) else: utils.vprint('Using batch system: %s' % self.wmsName, -1) self.errorLog = config.getWorkPath('error.tar') self._outputPath = config.getWorkPath('output') utils.ensureDirExists(self._outputPath, 'output directory') self._failPath = config.getWorkPath('fail') # Initialise proxy, broker and storage manager self.proxy = ClassFactory(config, ('proxy', 'TrivialProxy'), ('proxy manager', 'MultiProxy'), cls = Proxy, tags = [self]).getInstance() # UI -> SE -> WN self.smSEIn = config.getClass('se input manager', 'SEStorageManager', cls = StorageManager, tags = [self]).getInstance('se', 'se input', 'SE_INPUT') self.smSBIn = config.getClass('sb input manager', 'LocalSBStorageManager', cls = StorageManager, tags = [self]).getInstance('sandbox', 'sandbox', 'SB_INPUT') # UI <- SE <- WN self.smSEOut = config.getClass('se output manager', 'SEStorageManager', cls = StorageManager, tags = [self]).getInstance('se', 'se output', 'SE_OUTPUT') self.smSBOut = None
def deployTask(self, module, monitor): self.outputFiles = map(lambda (d, s, t): t, self._getSandboxFilesOut(module)) # HACK module.validateVariables() self.smSEIn.addFiles(map(lambda (d, s, t): t, module.getSEInFiles())) # add module SE files to SM # Transfer common SE files if self.config.getState(detail = 'storage'): self.smSEIn.doTransfer(module.getSEInFiles()) def convert(fnList): for fn in fnList: if isinstance(fn, str): yield (fn, os.path.basename(fn), False) else: yield (None, os.path.basename(fn.name), fn) # Package sandbox tar file utils.vprint('Packing sandbox:') sandbox = self._getSandboxName(module) utils.ensureDirExists(os.path.dirname(sandbox), 'sandbox directory') if not os.path.exists(sandbox) or self.config.getState(detail = 'sandbox'): utils.genTarball(sandbox, convert(self._getSandboxFiles(module, monitor, [self.smSEIn, self.smSEOut])))
def __init__(self, config, name, checkExecutor, cancelExecutor): WMS.__init__(self, config, name) for executor in [checkExecutor, cancelExecutor]: executor.setup(self._log) (self._check_executor, self._cancel_executor) = (checkExecutor, cancelExecutor) if self._name != self.__class__.__name__.upper(): self._log.info('Using batch system: %s (%s)', self.__class__.__name__, self._name) else: self._log.info('Using batch system: %s', self._name) self.errorLog = config.getWorkPath('error.tar') self._runlib = config.getWorkPath('gc-run.lib') if not os.path.exists(self._runlib): fp = SafeFile(self._runlib, 'w') content = SafeFile(utils.pathShare('gc-run.lib')).read() fp.write(content.replace('__GC_VERSION__', __import__('grid_control').__version__)) fp.close() self._outputPath = config.getWorkPath('output') self._filecachePath = config.getWorkPath('files') utils.ensureDirExists(self._outputPath, 'output directory') self._failPath = config.getWorkPath('fail') # Initialise access token and storage managers # UI -> SE -> WN self.smSEIn = config.getPlugin('se input manager', 'SEStorageManager', cls = StorageManager, tags = [self], pargs = ('se', 'se input', 'SE_INPUT')) self.smSBIn = config.getPlugin('sb input manager', 'LocalSBStorageManager', cls = StorageManager, tags = [self], pargs = ('sandbox', 'sandbox', 'SB_INPUT')) # UI <- SE <- WN self.smSEOut = config.getPlugin('se output manager', 'SEStorageManager', cls = StorageManager, tags = [self], pargs = ('se', 'se output', 'SE_OUTPUT')) self.smSBOut = None self._token = config.getCompositePlugin(['proxy', 'access token'], 'TrivialAccessToken', 'MultiAccessToken', cls = AccessToken, inherit = True, tags = [self])
def _secureLinkDirectory(self, sshLink, enforce = True): try: sshLinkDir = ensureDirExists(os.path.dirname(sshLink), 'SSH link direcory', BackendError) except Exception: if not self.socketEnforce: return False raise if sshLinkDir!=os.path.dirname(os.path.expanduser("~/.ssh/")): try: os.chmod(sshLinkDir, stat.S_IRWXU) except Exception: if self.socketEnforce: raise BackendError("Could not secure directory for SSHLink:\n %s" % sshLinkDir) else: return False return True
def retrieveJobs( self, ids): # Process output sandboxes returned by getJobsOutput log = logging.getLogger('wms') # Function to force moving a directory def forceMove(source, target): try: if os.path.exists(target): shutil.rmtree(target) except IOError: log.exception('%r cannot be removed', target) return False try: shutil.move(source, target) except IOError: log.exception( 'Error moving job output directory from %r to %r', source, target) return False return True retrievedJobs = [] for inJobNum, pathName in self._getJobsOutput(ids): # inJobNum != None, pathName == None => Job could not be retrieved if pathName is None: if inJobNum not in retrievedJobs: yield (inJobNum, -1, {}, None) continue # inJobNum == None, pathName != None => Found leftovers of job retrieval if inJobNum is None: continue # inJobNum != None, pathName != None => Job retrieval from WMS was ok jobFile = os.path.join(pathName, 'job.info') jobInfo = WMS.parseJobInfo(jobFile) if jobInfo: (jobNum, jobExitCode, jobData) = jobInfo if jobNum != inJobNum: raise BackendError('Invalid job id in job file %s' % jobFile) if forceMove(pathName, os.path.join(self._outputPath, 'job_%d' % jobNum)): retrievedJobs.append(inJobNum) yield (jobNum, jobExitCode, jobData, pathName) else: yield (jobNum, -1, {}, None) continue # Clean empty pathNames for subDir in imap(lambda x: x[0], os.walk(pathName, topdown=False)): try: os.rmdir(subDir) except Exception: pass if os.path.exists(pathName): # Preserve failed job utils.ensureDirExists(self._failPath, 'failed output directory') forceMove( pathName, os.path.join(self._failPath, os.path.basename(pathName))) yield (inJobNum, -1, {}, None)
def getSandboxPath(self, jobNum=''): sandpath = os.path.join(self.sandPath, str(jobNum), '' ) return utils.ensureDirExists(sandpath, 'sandbox directory', BackendError)
def _getJobsOutput(self, allIds): if len(allIds) == 0: raise StopIteration basePath = os.path.join(self._outputPath, 'tmp') try: if len(allIds) == 1: # For single jobs create single subdir basePath = os.path.join(basePath, md5(allIds[0][0]).hexdigest()) utils.ensureDirExists(basePath) except Exception: raise BackendError('Temporary path "%s" could not be created.' % basePath, BackendError) activity = utils.ActivityLog('retrieving job outputs') for ids in imap(lambda x: allIds[x:x+self._nJobsPerChunk], irange(0, len(allIds), self._nJobsPerChunk)): jobNumMap = dict(ids) jobs = ' '.join(self._getRawIDs(ids)) log = tempfile.mktemp('.log') #print self._outputExec, '--noint --logfile "%s" --dir "%s" %s' % (log, basePath, jobs) #import sys #sys.exit(1) proc = utils.LoggedProcess(self._outputExec, '--noint --logfile "%s" --dir "%s" %s' % (log, basePath, jobs)) # yield output dirs todo = jobNumMap.values() done = [] currentJobNum = None for line in imap(str.strip, proc.iter()): match = re.match(self._outputRegex, line) if match: currentJobNum = jobNumMap.get(self._createId(match.groupdict()['rawId'])) todo.remove(currentJobNum) done.append(match.groupdict()['rawId']) outputDir = match.groupdict()['outputDir'] if os.path.exists(outputDir): if 'GC_WC.tar.gz' in os.listdir(outputDir): wildcardTar = os.path.join(outputDir, 'GC_WC.tar.gz') try: tarfile.TarFile.open(wildcardTar, 'r:gz').extractall(outputDir) os.unlink(wildcardTar) except Exception: utils.eprint("Can't unpack output files contained in %s" % wildcardTar) yield (currentJobNum, outputDir) currentJobNum = None retCode = proc.wait() if retCode != 0: if 'Keyboard interrupt raised by user' in proc.getError(): utils.removeFiles([log, basePath]) raise StopIteration else: proc.logError(self.errorLog, log = log) utils.eprint('Trying to recover from error ...') for dirName in os.listdir(basePath): yield (None, os.path.join(basePath, dirName)) del activity # return unretrievable jobs for jobNum in todo: yield (jobNum, None) purgeLog = tempfile.mktemp('.log') purgeProc = utils.LoggedProcess(self._purgeExec, '--noint --logfile "%s" %s' % (purgeLog, " ".join(done))) retCode = purgeProc.wait() if retCode != 0: if self.explainError(purgeProc, retCode): pass else: proc.logError(self.errorLog, log = purgeLog, jobs = done) utils.removeFiles([log, purgeLog, basePath])
def retrieveJobs(self, gcID_jobNum_List ): # Process output sandboxes returned by getJobsOutput # Function to force moving a directory def forceMove(source, target): try: if os.path.exists(target): shutil.rmtree(target) except IOError: self._log.exception('%r cannot be removed', target) return False try: shutil.move(source, target) except IOError: self._log.exception( 'Error moving job output directory from %r to %r', source, target) return False return True retrievedJobs = [] for inJobNum, pathName in self._getJobsOutput(gcID_jobNum_List): # inJobNum != None, pathName == None => Job could not be retrieved if pathName is None: if inJobNum not in retrievedJobs: yield (inJobNum, -1, {}, None) continue # inJobNum == None, pathName != None => Found leftovers of job retrieval if inJobNum is None: continue # inJobNum != None, pathName != None => Job retrieval from WMS was ok jobFile = os.path.join(pathName, 'job.info') try: job_info = self._job_parser.process(pathName) except Exception: self._log.exception('Unable to parse job.info') job_info = None if job_info: jobNum = job_info[JobResult.JOBNUM] if jobNum != inJobNum: raise BackendError('Invalid job id in job file %s' % jobFile) if forceMove(pathName, os.path.join(self._outputPath, 'job_%d' % jobNum)): retrievedJobs.append(inJobNum) yield (jobNum, job_info[JobResult.EXITCODE], job_info[JobResult.RAW], pathName) else: yield (jobNum, -1, {}, None) continue # Clean empty pathNames for subDir in imap(lambda x: x[0], os.walk(pathName, topdown=False)): try: os.rmdir(subDir) except Exception: clear_current_exception() if os.path.exists(pathName): # Preserve failed job utils.ensureDirExists(self._failPath, 'failed output directory') forceMove( pathName, os.path.join(self._failPath, os.path.basename(pathName))) yield (inJobNum, -1, {}, None)
yield (jobNum, -1, {}) continue except: # Something went wrong utils.eprint('Warning: "%s" seems broken.' % info) # Clean empty dirs for subDir in map(lambda x: x[0], os.walk(dir, topdown=False)): try: os.rmdir(subDir) except: pass if os.path.exists(dir): # Preserve failed job utils.ensureDirExists(self._failPath, 'failed output directory') forceMove(dir, os.path.join(self._failPath, os.path.basename(dir))) yield (inJobNum, -1, {}) def _getSandboxName(self, module): return self.config.getWorkPath('files', module.taskID, self.wmsName, 'gc-sandbox.tar.gz') def _getSandboxFilesIn(self, module): return [ ('GC Runtime', utils.pathShare('gc-run.sh'), 'gc-run.sh'), ('GC Runtime library', utils.pathShare('gc-run.lib'), 'gc-run.lib'), ('GC Sandbox', self._getSandboxName(module), 'gc-sandbox.tar.gz'), ]
def _getJobsOutput(self, ids): if len(ids) == 0: raise StopIteration basePath = os.path.join(self._outputPath, 'tmp') try: if len(ids) == 1: # For single jobs create single subdir tmpPath = os.path.join(basePath, md5(ids[0][0]).hexdigest()) else: tmpPath = basePath utils.ensureDirExists(tmpPath) except Exception: raise BackendError('Temporary path "%s" could not be created.' % tmpPath, RuntimeError) jobNumMap = dict(ids) jobs = self.writeWMSIds(ids) log = tempfile.mktemp('.log') activity = utils.ActivityLog('retrieving job outputs') proc = utils.LoggedProcess(self._outputExec, '--noint --logfile "%s" -i "%s" --dir "%s"' % (log, jobs, tmpPath)) # yield output dirs todo = jobNumMap.values() currentJobNum = None for line in map(str.strip, proc.iter()): if line.startswith(tmpPath): todo.remove(currentJobNum) outputDir = line.strip() if os.path.exists(outputDir): if 'GC_WC.tar.gz' in os.listdir(outputDir): wildcardTar = os.path.join(outputDir, 'GC_WC.tar.gz') try: tarfile.TarFile.open(wildcardTar, 'r:gz').extractall(outputDir) os.unlink(wildcardTar) except Exception: utils.eprint("Can't unpack output files contained in %s" % wildcardTar) pass yield (currentJobNum, line.strip()) currentJobNum = None else: currentJobNum = jobNumMap.get(self._createId(line), currentJobNum) retCode = proc.wait() del activity if retCode != 0: if 'Keyboard interrupt raised by user' in proc.getError(): utils.removeFiles([log, jobs, basePath]) raise StopIteration else: proc.logError(self.errorLog, log = log) utils.eprint('Trying to recover from error ...') for dirName in os.listdir(basePath): yield (None, os.path.join(basePath, dirName)) # return unretrievable jobs for jobNum in todo: yield (jobNum, None) utils.removeFiles([log, jobs, basePath])
def getSandboxPath(self, subdirToken=""): sandpath = os.path.join(self._sandboxDir, str(subdirToken), '' ) return utils.ensureDirExists(sandpath, 'sandbox directory', BackendError)
def getSandboxPath(self, subdirToken=""): sandpath = os.path.join(self._sandboxDir, str(subdirToken), '') return utils.ensureDirExists(sandpath, 'sandbox directory', BackendError)
def _getJobsOutput(self, ids): if len(ids) == 0: raise StopIteration basePath = os.path.join(self._outputPath, 'tmp') try: if len(ids) == 1: # For single jobs create single subdir tmpPath = os.path.join(basePath, md5(ids[0][0]).hexdigest()) else: tmpPath = basePath utils.ensureDirExists(tmpPath) except Exception: raise BackendError( 'Temporary path "%s" could not be created.' % tmpPath, BackendError) jobNumMap = dict(ids) jobs = self.writeWMSIds(ids) activity = Activity('retrieving %d job outputs' % len(ids)) proc = LocalProcess(self._outputExec, '--noint', '--logfile', '/dev/stderr', '-i', jobs, '--dir', tmpPath) # yield output dirs todo = jobNumMap.values() currentJobNum = None for line in imap(str.strip, proc.stdout.iter(timeout=60)): if line.startswith(tmpPath): todo.remove(currentJobNum) outputDir = line.strip() if os.path.exists(outputDir): if 'GC_WC.tar.gz' in os.listdir(outputDir): wildcardTar = os.path.join(outputDir, 'GC_WC.tar.gz') try: tarfile.TarFile.open(wildcardTar, 'r:gz').extractall(outputDir) os.unlink(wildcardTar) except Exception: self._log.error( 'Can\'t unpack output files contained in %s', wildcardTar) yield (currentJobNum, line.strip()) currentJobNum = None else: currentJobNum = jobNumMap.get(self._createId(line), currentJobNum) retCode = proc.status(timeout=0, terminate=True) activity.finish() if retCode != 0: if 'Keyboard interrupt raised by user' in proc.stderr.read( timeout=0): utils.removeFiles([jobs, basePath]) raise StopIteration else: self._log.log_process(proc, files={'jobs': SafeFile(jobs).read()}) self._log.error('Trying to recover from error ...') for dirName in os.listdir(basePath): yield (None, os.path.join(basePath, dirName)) # return unretrievable jobs for jobNum in todo: yield (jobNum, None) utils.removeFiles([jobs, basePath])
def getSandboxPath(self, jobNum=''): sandpath = os.path.join(self.sandPath, str(jobNum), '') return utils.ensureDirExists(sandpath, 'sandbox directory', BackendError)
def __init__(self, config): self._cache = [] self._path = config.getPath('sandbox path', config.getWorkPath('sandbox'), mustExist = False) utils.ensureDirExists(self._path, 'sandbox base', BackendError)