def index_directory(directory, ignore_cache=False, warn_if_longer=3): ''' Returns a hash filename -> list of streams. ''' file2streams = {} # logger.debug('Indexing directory %r (ignore cache: %s).' % # (friendly_path(directory), ignore_cache)) with warn_long_time(warn_if_longer, 'indexing directory %r' % friendly_path(directory)): files = get_all_log_files(directory) # Shuffle the list so that multiple threads will index different files import random random.seed() random.shuffle(files) with warn_long_time(warn_if_longer, 'indexing %d files (use cache: %s)' % (len(files), not ignore_cache)): for filename in files: reader = LogsFormat.get_reader_for(filename) try: file2streams[filename] = \ reader.index_file_cached(filename, ignore_cache=ignore_cache) for stream in file2streams[filename]: assert isinstance(stream, BootStream) if not file2streams[filename]: logger.warning('No streams found in file %r.' % friendly_path(filename)) except None: # XXX logger.error('Invalid data in file %r.' % friendly_path(filename)) logger.error(traceback.format_exc()) return file2streams
def set(self, key, value): # @ReservedAssignment """ Return a dictionary with some statistics """ if not StorageFilesystem.checked_existence: StorageFilesystem.checked_existence = True if not os.path.exists(self.basepath): os.makedirs(self.basepath) # TODO: generalize this filename = self.filename_for_key(key) with warn_long_time(self.warn_long_time, 'dumping %r' % key) as moreinfo: protocol = HIGHEST_PROTOCOL paranoid = False if paranoid: safe_pickle_dump(value, filename, protocol) else: with open(filename, 'wb') as f: pickle.dump(value, f, protocol) moreinfo['size'] = os.stat(filename).st_size # TODO: remove this stats = {} stats['duration'] = 0 # XXX stats['clock'] = 0 # XXX stats['size'] = os.stat(filename).st_size return stats
def get(self, key): if not self.exists(key): raise Exception('Could not find key %r.' % key) filename = self.filename_for_key(key) try: with warn_long_time(self.warn_long_time, 'reading %r' % key): return safe_pickle_load(filename) except Exception as e: msg = "Could not unpickle file %r: %s" % (filename, e) logger.error(msg) raise
def index_file_cached(self, filename, ignore_cache=False): cache = '%s.index_cache' % filename if os.path.exists(cache) and not ignore_cache: # TODO: mtime try: return safe_pickle_load(cache) except Exception as e: msg = 'Could not unpickle cache %r, deleting.' % friendly_path(cache) msg += '\n%s' % e logger.warning(msg) try: os.unlink(cache) except: pass logger.debug('Indexing file %r' % friendly_path(filename)) res = self.index_file(filename) for stream in res: assert isinstance(stream, BootStream) logger.debug('Now dumping file %r' % friendly_path(cache)) with warn_long_time(1, 'dumping %r' % friendly_path(cache)): safe_pickle_dump(res, cache, protocol=2) return res