示例#1
0
def index_directory(directory, ignore_cache=False, warn_if_longer=3):
    ''' Returns a hash filename -> list of streams. '''
    file2streams = {}
    # logger.debug('Indexing directory %r (ignore cache: %s).' % 
    #             (friendly_path(directory), ignore_cache))
    
    with warn_long_time(warn_if_longer, 'indexing directory %r' % 
                                        friendly_path(directory)):
        files = get_all_log_files(directory)
    
    # Shuffle the list so that multiple threads will index different files
    import random
    random.seed()
    random.shuffle(files)

    with warn_long_time(warn_if_longer, 'indexing %d files (use cache: %s)' % 
                        (len(files), not ignore_cache)):
        for filename in files:
            reader = LogsFormat.get_reader_for(filename)
            try:
                file2streams[filename] = \
                    reader.index_file_cached(filename, ignore_cache=ignore_cache)
                for stream in file2streams[filename]:
                    assert isinstance(stream, BootStream)
                if not file2streams[filename]:
                    logger.warning('No streams found in file %r.' % 
                                   friendly_path(filename))
            except None:  # XXX
                logger.error('Invalid data in file %r.' % friendly_path(filename))
                logger.error(traceback.format_exc())

  
    return file2streams
    def set(self, key, value):  # @ReservedAssignment
        """ Return a dictionary with some statistics """
        if not StorageFilesystem.checked_existence:
            StorageFilesystem.checked_existence = True
            if not os.path.exists(self.basepath):
                os.makedirs(self.basepath)

        # TODO: generalize this
        filename = self.filename_for_key(key)

        with warn_long_time(self.warn_long_time,
                            'dumping %r' % key) as moreinfo:
            protocol = HIGHEST_PROTOCOL
            paranoid = False
            if paranoid:        
                safe_pickle_dump(value, filename, protocol)
            else:
                with open(filename, 'wb') as f:
                    pickle.dump(value, f, protocol)
                
            moreinfo['size'] = os.stat(filename).st_size
            
        # TODO: remove this
        stats = {}
        stats['duration'] = 0  # XXX
        stats['clock'] = 0  # XXX
        stats['size'] = os.stat(filename).st_size
        return stats
    def get(self, key):
        if not self.exists(key):
            raise Exception('Could not find key %r.' % key)
        
        filename = self.filename_for_key(key)
        try:
            with warn_long_time(self.warn_long_time, 'reading %r' % key):  
                return safe_pickle_load(filename)

        except Exception as e:
            msg = "Could not unpickle file %r: %s" % (filename, e)
            logger.error(msg)
            raise
示例#4
0
    def index_file_cached(self, filename, ignore_cache=False):
        cache = '%s.index_cache' % filename
        if os.path.exists(cache) and not ignore_cache:  # TODO: mtime
            try:
                return safe_pickle_load(cache)
            except Exception as e:
                msg = 'Could not unpickle cache %r, deleting.' % friendly_path(cache)
                msg += '\n%s' % e
                logger.warning(msg)
                try:
                    os.unlink(cache)
                except:
                    pass
        logger.debug('Indexing file %r' % friendly_path(filename))
        res = self.index_file(filename)
        for stream in res:
            assert isinstance(stream, BootStream)
            
        logger.debug('Now dumping file %r' % friendly_path(cache))
        with warn_long_time(1, 'dumping %r' % friendly_path(cache)):
            safe_pickle_dump(res, cache, protocol=2)

        return res