Exemplo n.º 1
0
	def _readJobs(self, jobLimit):
		utils.ensureDirExists(self._dbPath, 'job database directory', JobError)

		candidates = []
		for jobFile in fnmatch.filter(os.listdir(self._dbPath), 'job_*.txt'):
			try: # 2xsplit is faster than regex
				jobNum = int(jobFile.split(".")[0].split("_")[1])
			except Exception:
				continue
			candidates.append((jobNum, jobFile))

		(jobMap, maxJobs) = ({}, len(candidates))
		activity = Activity('Reading job infos')
		idx = 0
		for (jobNum, jobFile) in sorted(candidates):
			idx += 1
			if (jobLimit >= 0) and (jobNum >= jobLimit):
				self._log.info('Stopped reading job infos at job #%d out of %d available job files, since the limit of %d jobs is reached',
					jobNum, len(candidates), jobLimit)
				break
			jobObj = self._load_job(os.path.join(self._dbPath, jobFile))
			jobMap[jobNum] = jobObj
			if idx % 100 == 0:
				activity.update('Reading job infos %d [%d%%]' % (idx, (100.0 * idx) / maxJobs))
		activity.finish()
		return jobMap
Exemplo n.º 2
0
	def _read_jobs(self, job_limit):
		ensure_dir_exists(self._path_db, 'job database directory', JobError)

		candidates = []
		for job_fn in fnmatch.filter(os.listdir(self._path_db), 'job_*.txt'):
			try:  # 2xsplit is faster than regex
				jobnum = int(job_fn.split(".")[0].split("_")[1])
			except Exception:
				clear_current_exception()
				continue
			candidates.append((jobnum, job_fn))

		(job_map, max_job_len) = ({}, len(candidates))
		activity = Activity('Reading job infos')
		idx = 0
		for (jobnum, job_fn) in sorted(candidates):
			idx += 1
			if jobnum >= job_limit >= 0:
				self._log.info('Stopped reading job infos at job #%d out of %d available job files, ' +
					'since the limit of %d jobs is reached', jobnum, len(candidates), job_limit)
				break
			try:
				job_fn_full = os.path.join(self._path_db, job_fn)
				data = self._fmt.parse(SafeFile(job_fn_full).iter_close())
				job_obj = self._create_job_obj(job_fn_full, data)
			except Exception:
				raise JobError('Unable to process job file %r' % job_fn_full)
			job_map[jobnum] = job_obj
			activity.update('Reading job infos %d [%d%%]' % (idx, (100.0 * idx) / max_job_len))
		activity.finish()
		return job_map
Exemplo n.º 3
0
def create_tarball(match_info_iter, **kwargs):
    tar = tarfile.open(mode='w:gz', **kwargs)
    activity = Activity('Generating tarball')
    for match_info in match_info_iter:
        if isinstance(match_info, tuple):
            (path_source, path_target) = match_info
        else:
            (path_source, path_target) = (match_info, None)
        if isinstance(path_source, str):
            if not os.path.exists(path_source):
                raise PathError('File %s does not exist!' % path_source)
            tar.add(path_source,
                    path_target or os.path.basename(path_source),
                    recursive=False)
        elif path_source is None:  # Update activity
            activity.update('Generating tarball: %s' % path_target)
        else:  # File handle
            info, handle = path_source.get_tar_info()
            if path_target:
                info.name = path_target
            info.mtime = time.time()
            info.mode = stat.S_IRUSR + stat.S_IWUSR + stat.S_IRGRP + stat.S_IROTH
            if info.name.endswith('.sh') or info.name.endswith('.py'):
                info.mode += stat.S_IXUSR + stat.S_IXGRP + stat.S_IXOTH
            tar.addfile(info, handle)
            handle.close()
    activity.finish()
    tar.close()
Exemplo n.º 4
0
def create_tarball(match_info_iter, **kwargs):
	tar = tarfile.open(mode='w:gz', **kwargs)
	activity = Activity('Generating tarball')
	for match_info in match_info_iter:
		if isinstance(match_info, tuple):
			(path_source, path_target) = match_info
		else:
			(path_source, path_target) = (match_info, None)
		if isinstance(path_source, str):
			if not os.path.exists(path_source):
				raise PathError('File %s does not exist!' % path_source)
			tar.add(path_source, path_target or os.path.basename(path_source), recursive=False)
		elif path_source is None:  # Update activity
			activity.update('Generating tarball: %s' % path_target)
		else:  # File handle
			info, handle = path_source.get_tar_info()
			if path_target:
				info.name = path_target
			info.mtime = time.time()
			info.mode = stat.S_IRUSR + stat.S_IWUSR + stat.S_IRGRP + stat.S_IROTH
			if info.name.endswith('.sh') or info.name.endswith('.py'):
				info.mode += stat.S_IXUSR + stat.S_IXGRP + stat.S_IXOTH
			tar.addfile(info, handle)
			handle.close()
	activity.finish()
	tar.close()
Exemplo n.º 5
0
    def _read_jobs(self, job_limit):
        ensure_dir_exists(self._path_db, 'job database directory', JobError)

        candidates = []
        for job_fn in fnmatch.filter(os.listdir(self._path_db), 'job_*.txt'):
            try:  # 2xsplit is faster than regex
                jobnum = int(job_fn.split(".")[0].split("_")[1])
            except Exception:
                clear_current_exception()
                continue
            candidates.append((jobnum, job_fn))

        (job_map, max_job_len) = ({}, len(candidates))
        activity = Activity('Reading job infos')
        idx = 0
        for (jobnum, job_fn) in sorted(candidates):
            idx += 1
            if jobnum >= job_limit >= 0:
                self._log.info(
                    'Stopped reading job infos at job #%d out of %d available job files, '
                    + 'since the limit of %d jobs is reached', jobnum,
                    len(candidates), job_limit)
                break
            try:
                job_fn_full = os.path.join(self._path_db, job_fn)
                data = self._fmt.parse(SafeFile(job_fn_full).iter_close())
                job_obj = self._create_job_obj(job_fn_full, data)
            except Exception:
                raise JobError('Unable to process job file %r' % job_fn_full)
            job_map[jobnum] = job_obj
            activity.update('Reading job infos %d [%d%%]' %
                            (idx, (100.0 * idx) / max_job_len))
        activity.finish()
        return job_map
Exemplo n.º 6
0
	def _read_jobs(self, job_limit):
		job_map = {}
		max_job_len = 0
		if os.path.exists(self._db_fn):
			try:
				tar = zipfile.ZipFile(self._db_fn, 'r', zipfile.ZIP_DEFLATED)
				tar.testzip()
			except Exception:  # Try to recover job archive
				clear_current_exception()
				self._log.warning('Job database is corrupted - starting recovery')
				self._recover_jobs()
				self._log.info('Recover completed!')
			activity = Activity('Reading job transactions')
			max_job_len = len(tar.namelist())
			map_jobnum2tarfn = {}
			for idx, tar_info_fn in enumerate(tar.namelist()):
				(jobnum, tid) = tuple(imap(lambda s: int(s[1:]), tar_info_fn.split('_', 1)))
				if tid < map_jobnum2tarfn.get(jobnum, 0):
					continue
				try:
					data = self._fmt.parse(tar.open(tar_info_fn).read())
				except Exception:
					clear_current_exception()
					continue
				job_map[jobnum] = self._create_job_obj(tar_info_fn, data)
				map_jobnum2tarfn[jobnum] = tid
				if idx % 100 == 0:
					activity.update('Reading job transactions %d [%d%%]' % (idx, (100.0 * idx) / max_job_len))
			activity.finish()
		self._serial = max_job_len
		return job_map
Exemplo n.º 7
0
    def _readJobs(self, jobLimit):
        utils.ensureDirExists(self._dbPath, 'job database directory', JobError)

        candidates = []
        for jobFile in fnmatch.filter(os.listdir(self._dbPath), 'job_*.txt'):
            try:  # 2xsplit is faster than regex
                jobNum = int(jobFile.split(".")[0].split("_")[1])
            except Exception:
                continue
            candidates.append((jobNum, jobFile))

        (jobMap, maxJobs) = ({}, len(candidates))
        activity = Activity('Reading job infos')
        idx = 0
        for (jobNum, jobFile) in sorted(candidates):
            idx += 1
            if (jobLimit >= 0) and (jobNum >= jobLimit):
                self._log.info(
                    'Stopped reading job infos at job #%d out of %d available job files, since the limit of %d jobs is reached',
                    jobNum, len(candidates), jobLimit)
                break
            jobObj = self._load_job(os.path.join(self._dbPath, jobFile))
            jobMap[jobNum] = jobObj
            if idx % 100 == 0:
                activity.update('Reading job infos %d [%d%%]' %
                                (idx, (100.0 * idx) / maxJobs))
        activity.finish()
        return jobMap
Exemplo n.º 8
0
	def _saveStateToTar(self, tar, meta, source, sourceLen, message):
		# Write the splitting info grouped into subtarfiles
		activity = Activity(message)
		(jobNum, lastValid, subTar) = (-1, -1, None)
		for jobNum, entry in enumerate(source):
			if not entry.get(DataSplitter.Invalid, False):
				lastValid = jobNum
			if jobNum % self._keySize == 0:
				self._closeSubTar(tar, subTar)
				subTar = self._createSubTar('%03dXX.tgz' % int(jobNum / self._keySize))
				activity.update('%s [%d / %d]' % (message, jobNum, sourceLen))
			# Determine shortest way to store file list
			tmp = entry.pop(DataSplitter.FileList)
			savelist = self._getReducedFileList(entry, tmp) # can modify entry
			# Write files with infos / filelist
			data = str.join('', self._fmt.format(entry, fkt = self._formatFileEntry) + lmap(lambda fn: '=%s\n' % fn, savelist))
			self._addToSubTar(subTar, '%05d' % jobNum, data)
			# Remove common prefix from info
			if DataSplitter.CommonPrefix in entry:
				entry.pop(DataSplitter.CommonPrefix)
			entry[DataSplitter.FileList] = tmp
		self._closeSubTar(tar, subTar)
		activity.finish()
		# Write metadata to allow reconstruction of data splitter
		meta['MaxJobs'] = lastValid + 1
		for (fn, data) in [('Metadata', self._fmt.format(meta)), ('Version', '2')]:
			self._addToTar(tar, fn, data)
Exemplo n.º 9
0
def wait(timeout):
    activity = Activity('Waiting', parent='root')
    for remaining in irange(timeout, 0, -1):
        if abort():
            return False
        if (remaining == timeout) or (remaining < 5) or (remaining % 5 == 0):
            activity.update('Waiting for %d seconds' % remaining)
        time.sleep(1)
    activity.finish()
    return True
Exemplo n.º 10
0
def wait(timeout):
	activity = Activity('Waiting', parent='root')
	for remaining in irange(timeout, 0, -1):
		if abort():
			return False
		if (remaining == timeout) or (remaining < 5) or (remaining % 5 == 0):
			activity.update('Waiting for %d seconds' % remaining)
		time.sleep(1)
	activity.finish()
	return True
Exemplo n.º 11
0
 def getEntries(self, path, metadata, events, seList, objStore):
     metadata['GC_SOURCE_DIR'] = self._path
     counter = 0
     activity = Activity('Reading source directory')
     for fn in self._iter_path():
         activity.update('Reading source directory - [%d]' % counter)
         yield (os.path.join(self._path, fn.strip()), metadata, events,
                seList, objStore)
         counter += 1
     activity.finish()
Exemplo n.º 12
0
 def _readJobs(self, jobLimit):
     jobMap = {}
     maxJobs = 0
     if os.path.exists(self._dbFile):
         try:
             tar = zipfile.ZipFile(self._dbFile, 'r', zipfile.ZIP_DEFLATED)
         except Exception:  # Try to recover job archive
             self._log.warning(
                 '=' * 40 +
                 '\nStarting recovery of broken job database => Answer "y" if asked "Is this a single-disk archive?"!\n'
                 + '=' * 40)
             os.system('zip -FF %s --out %s.tmp 2> /dev/null' %
                       (self._dbFile, self._dbFile))
             os.rename(self._dbFile, self._dbFile + '.broken')
             os.rename(self._dbFile + '.tmp', self._dbFile)
             tar = zipfile.ZipFile(self._dbFile, 'r', zipfile.ZIP_DEFLATED)
             removeFiles([self._dbFile + '.broken'])
             brokenList = []
             for idx, fnTarInfo in enumerate(tar.namelist()):
                 (jobNum, tid) = tuple(
                     imap(lambda s: int(s[1:]), fnTarInfo.split('_', 1)))
                 try:
                     fp = tar.open(fnTarInfo)
                     try:
                         fp.read()
                     finally:
                         fp.close()
                 except Exception:
                     clear_current_exception()
             for broken in brokenList:
                 os.system('zip %s -d %s' % (self._dbFile, broken))
             self._log.info('Recover completed!')
         activity = Activity('Reading job transactions')
         maxJobs = len(tar.namelist())
         tMap = {}
         for idx, fnTarInfo in enumerate(tar.namelist()):
             (jobNum, tid) = tuple(
                 imap(lambda s: int(s[1:]), fnTarInfo.split('_', 1)))
             if tid < tMap.get(jobNum, 0):
                 continue
             try:
                 data = self._fmt.parse(tar.open(fnTarInfo).read())
             except Exception:
                 continue
             jobMap[jobNum] = self._create_job_obj(fnTarInfo, data)
             tMap[jobNum] = tid
             if idx % 100 == 0:
                 activity.update('Reading job transactions %d [%d%%]' %
                                 (idx, (100.0 * idx) / maxJobs))
         activity.finish()
     self._serial = maxJobs
     return jobMap
Exemplo n.º 13
0
 def getEntries(self, path, metadata, events, seList, objStore):
     activity = Activity('Reading job logs')
     for jobNum in self._selected:
         activity.update('Reading job logs - [%d / %d]' %
                         (jobNum, self._selected[-1]))
         metadata['GC_JOBNUM'] = jobNum
         objStore.update({
             'GC_TASK': self._extTask,
             'GC_WORKDIR': self._extWorkDir
         })
         yield (os.path.join(self._extWorkDir, 'output', 'job_%d' % jobNum),
                metadata, events, seList, objStore)
     activity.finish()
Exemplo n.º 14
0
	def _readJobs(self, jobLimit):
		jobMap = {}
		maxJobs = 0
		if os.path.exists(self._dbFile):
			try:
				tar = zipfile.ZipFile(self._dbFile, 'r', zipfile.ZIP_DEFLATED)
			except Exception: # Try to recover job archive
				self._log.warning('=' * 40 + '\nStarting recovery of broken job database => Answer "y" if asked "Is this a single-disk archive?"!\n' + '=' * 40)
				os.system('zip -FF %s --out %s.tmp 2> /dev/null' % (self._dbFile, self._dbFile))
				os.rename(self._dbFile, self._dbFile + '.broken')
				os.rename(self._dbFile + '.tmp', self._dbFile)
				tar = zipfile.ZipFile(self._dbFile, 'r', zipfile.ZIP_DEFLATED)
				removeFiles([self._dbFile + '.broken'])
				brokenList = []
				for idx, fnTarInfo in enumerate(tar.namelist()):
					(jobNum, tid) = tuple(imap(lambda s: int(s[1:]), fnTarInfo.split('_', 1)))
					try:
						fp = tar.open(fnTarInfo)
						try:
							fp.read()
						finally:
							fp.close()
					except Exception:
						clear_current_exception()
				for broken in brokenList:
					os.system('zip %s -d %s' % (self._dbFile, broken))
				self._log.info('Recover completed!')
			activity = Activity('Reading job transactions')
			maxJobs = len(tar.namelist())
			tMap = {}
			for idx, fnTarInfo in enumerate(tar.namelist()):
				(jobNum, tid) = tuple(imap(lambda s: int(s[1:]), fnTarInfo.split('_', 1)))
				if tid < tMap.get(jobNum, 0):
					continue
				try:
					data = self._fmt.parse(tar.open(fnTarInfo).read())
				except Exception:
					continue
				jobMap[jobNum] = self._create_job_obj(fnTarInfo, data)
				tMap[jobNum] = tid
				if idx % 100 == 0:
					activity.update('Reading job transactions %d [%d%%]' % (idx, (100.0 * idx) / maxJobs))
			activity.finish()
		self._serial = maxJobs
		return jobMap
Exemplo n.º 15
0
 def getEntries(self, path, metadata, events, seList, objStore):
     allDirs = lfilter(lambda fn: fn.startswith('job_'),
                       os.listdir(self._extOutputDir))
     activity = Activity('Reading job logs')
     for idx, dirName in enumerate(allDirs):
         activity.update('Reading job logs - [%d / %d]' %
                         (idx, len(allDirs)))
         try:
             metadata['GC_JOBNUM'] = int(dirName.split('_')[1])
         except Exception:
             continue
         objStore['GC_WORKDIR'] = self._extWorkDir
         if self._selector and not self._selector(metadata['GC_JOBNUM'],
                                                  None):
             continue
         yield (os.path.join(self._extOutputDir,
                             dirName), metadata, events, seList, objStore)
     activity.finish()
Exemplo n.º 16
0
	def write(cls, fn, pa):
		fp = ZipFile(fn, 'w')
		try:
			keys = sorted(ifilter(lambda p: not p.untracked, pa.getJobKeys()))
			fp.write('# %s\n' % json.dumps(keys))
			maxN = pa.getMaxJobs()
			if maxN:
				activity = Activity('Writing parameter dump')
				for jobNum in irange(maxN):
					activity.update('Writing parameter dump [%d/%d]' % (jobNum + 1, maxN))
					meta = pa.getJobInfo(jobNum)
					meta_str = str.join('\t', imap(lambda k: json.dumps(meta.get(k, '')), keys))
					if meta.get(ParameterInfo.ACTIVE, True):
						fp.write('%d\t%s\n' % (jobNum, meta_str))
					else:
						fp.write('%d!\t%s\n' % (jobNum, meta_str))
				activity.finish()
		finally:
			fp.close()
Exemplo n.º 17
0
 def write(cls, fn, pa):
     fp = ZipFile(fn, 'w')
     try:
         keys = sorted(ifilter(lambda p: not p.untracked, pa.getJobKeys()))
         fp.write('# %s\n' % json.dumps(keys))
         maxN = pa.getMaxJobs()
         if maxN:
             activity = Activity('Writing parameter dump')
             for jobNum in irange(maxN):
                 activity.update('Writing parameter dump [%d/%d]' %
                                 (jobNum + 1, maxN))
                 meta = pa.getJobInfo(jobNum)
                 meta_str = str.join(
                     '\t', imap(lambda k: json.dumps(meta.get(k, '')),
                                keys))
                 if meta.get(ParameterInfo.ACTIVE, True):
                     fp.write('%d\t%s\n' % (jobNum, meta_str))
                 else:
                     fp.write('%d!\t%s\n' % (jobNum, meta_str))
             activity.finish()
     finally:
         fp.close()
Exemplo n.º 18
0
def genTarball(outFile, fileList):
	tar = tarfile.open(outFile, 'w:gz')
	activity = Activity('Generating tarball')
	for (pathAbs, pathRel, pathStatus) in fileList:
		if pathStatus is True: # Existing file
			tar.add(pathAbs, pathRel, recursive = False)
		elif pathStatus is False: # Existing file
			if not os.path.exists(pathAbs):
				raise UserError('File %s does not exist!' % pathRel)
			tar.add(pathAbs, pathRel, recursive = False)
		elif pathStatus is None: # Directory
			activity.update('Generating tarball: %s' % pathRel)
		else: # File handle
			info, handle = pathStatus.getTarInfo()
			info.mtime = time.time()
			info.mode = stat.S_IRUSR + stat.S_IWUSR + stat.S_IRGRP + stat.S_IROTH
			if info.name.endswith('.sh') or info.name.endswith('.py'):
				info.mode += stat.S_IXUSR + stat.S_IXGRP + stat.S_IXOTH
			tar.addfile(info, handle)
			handle.close()
	activity.finish()
	tar.close()
Exemplo n.º 19
0
def genTarball(outFile, fileList):
    tar = tarfile.open(outFile, 'w:gz')
    activity = Activity('Generating tarball')
    for (pathAbs, pathRel, pathStatus) in fileList:
        if pathStatus is True:  # Existing file
            tar.add(pathAbs, pathRel, recursive=False)
        elif pathStatus is False:  # Existing file
            if not os.path.exists(pathAbs):
                raise UserError('File %s does not exist!' % pathRel)
            tar.add(pathAbs, pathRel, recursive=False)
        elif pathStatus is None:  # Directory
            activity.update('Generating tarball: %s' % pathRel)
        else:  # File handle
            info, handle = pathStatus.getTarInfo()
            info.mtime = time.time()
            info.mode = stat.S_IRUSR + stat.S_IWUSR + stat.S_IRGRP + stat.S_IROTH
            if info.name.endswith('.sh') or info.name.endswith('.py'):
                info.mode += stat.S_IXUSR + stat.S_IXGRP + stat.S_IXOTH
            tar.addfile(info, handle)
            handle.close()
    activity.finish()
    tar.close()
Exemplo n.º 20
0
	def _saveStateToTar(self, tar, meta, source, sourceLen, message):
		# Write the splitting info grouped into subtarfiles
		activity = Activity(message)
		(jobNum, subTar) = (-1, None)
		for jobNum, entry in enumerate(source):
			if jobNum % 100 == 0:
				self._closeSubTar(tar, subTar)
				subTar = self._createSubTar('%03dXX.tgz' % int(jobNum / 100))
				activity.update('%s [%d / %d]' % (message, jobNum, sourceLen))
			# Determine shortest way to store file list
			tmp = entry.pop(DataSplitter.FileList)
			savelist = self._getReducedFileList(entry, tmp) # can modify entry
			# Write files with infos / filelist
			for name, data in [('list', str.join('\n', savelist)), ('info', self._fmt.format(entry, fkt = self._formatFileEntry))]:
				self._addToSubTar(subTar, os.path.join('%05d' % jobNum, name), data)
			# Remove common prefix from info
			if DataSplitter.CommonPrefix in entry:
				entry.pop(DataSplitter.CommonPrefix)
			entry[DataSplitter.FileList] = tmp
		self._closeSubTar(tar, subTar)
		# Write metadata to allow reconstruction of data splitter
		meta['MaxJobs'] = jobNum + 1
		self._addToTar(tar, 'Metadata', self._fmt.format(meta))
		activity.finish()