Пример #1
0
    def process(self):
        progress = Progress('hashing')

        __LOG__.debug(
            'hashing %d files (%s)',
            len(self._queue),
            sizeof_fmt(self.bytes_to_hash)
        )
        progress.start(self, maxval=self.bytes_to_hash)
        for (file_path, file_size, dest) in self._queue:
            __LOG__.debug(
                'hashing %s (%s)...',
                quote(file_path),
                sizeof_fmt(file_size)
            )
            try:
                hash_values = hashfile(file_path)
            except Exception as err:
                __LOG__.exception('hashing of %s failed: ', file_path)
                self.errors.append(err)
            else:
                dest.update(hash_values)
                self.processed.append(dest)
            self.bytes_processed = self.bytes_processed + file_size
            progress.update(self, val=self.bytes_processed)
        progress.finish(self)
        __LOG__.debug('%d files hashed', len(self.processed))
        return self.processed
Пример #2
0
 def duplicates(self, limit=-1):
     progress = Progress('find duplicates')
     index = 0
     progress.start(self, maxval=limit if limit > 0 else None)
     for i in self.file_registry.find_duplicates(limit=limit):
         yield i
         index = index + 1
         progress.update(self, val=index)
     progress.finish(self)
Пример #3
0
 def _step0_scan_db(self):
     progress = Progress('scanning db')
     __LOG__.debug('scanning db...')
     db_entries_count = self.file_registry.count()
     old_files_deleted = 0
     mtime_changed = 0
     size_changed = 0
     index = 0
     progress.start(self, maxval=db_entries_count)
     for db_file in self.file_registry.find_all():
         index = index + 1
         progress.update(self, val=index)
         abs_path = join(self.base_dir, db_file.path)
         try:
             changed = False
             stat = os.stat(abs_path)
             if db_file.mtime != int(stat.st_mtime):
                 mtime_changed = mtime_changed + 1
                 changed = True
             if db_file.size != stat.st_size:
                 size_changed = size_changed + 1
                 changed = True
             if changed:
                 db_file.mtime = int(stat.st_mtime)
                 db_file.size = stat.st_size
                 self.hash_queue.append(abs_path, stat.st_size, db_file)
             self.visited_files.append(s(db_file.path))
         except OSError as err:
             if err.errno == errno.ENOENT:
                 __LOG__.debug('deleting %s', quote(db_file.path))
                 self.file_registry.delete(db_file)
                 old_files_deleted = old_files_deleted + 1
             else:
                 raise  # pragma: no cover
     progress.finish(self)
     __LOG__.debug('mtime of %d files changed', mtime_changed)
     __LOG__.debug('size of %d files changed', size_changed)
     __LOG__.debug('%d old files deleted', old_files_deleted)
     return len(self.visited_files)
Пример #4
0
 def _step1_scan_fs(self):
     progress = Progress('scanning fs')
     __LOG__.debug('scanning %s', quote(self.base_dir))
     new_files_found = 0
     progress.start(self)
     for entry in files_of_dir(self.base_dir, self.is_excluded):
         progress.update(self)
         rel_path = os.path.relpath(entry.path, self.base_dir)
         if rel_path not in self.visited_files:
             new_files_found = new_files_found + 1
             db_file = model.File(
                 path=u(rel_path),
                 mtime=int(entry.stats.st_mtime),
                 size=entry.stats.st_size,
             )
             self.hash_queue.append(
                 entry.path,
                 entry.stats.st_size,
                 db_file
             )
     progress.finish(self)
     __LOG__.debug('%d new files found', new_files_found)
     return new_files_found
Пример #5
0
 def init(self):
     progress = Progress('init')
     __LOG__.debug('initializing...')
     progress.start(self, maxval=1)
     model.create_schema(self.db_session.get_bind())
     progress.finish(self)