def load(self, pbar_enabled): fobj = open(self.filename) pbar = ProgressBar(maxval=len(fobj.readlines()), enabled=pbar_enabled) fobj.seek(0) reader = csv.DictReader(fobj, fieldnames=self.fieldnames) # skip header header = reader.next() for fieldname in header: if fieldname != header[fieldname].lower(): # header not found fobj.seek(0) break library = {} for entry in reader: library[entry['md5'].lower()] = (entry['filename'], int(entry['filesize'])) if reader.line_num % PROGRESSBAR_UPDATE_INTERVAL == 0: pbar.set(reader.line_num) pbar.finish() fobj.close() return library
def main(): global config, log oparser = optparse.OptionParser( usage='%prog [options] <source> <destination>', version=APP_VERSION_STRING, prog=APP_SHORT_NAME) oparser.add_option('-n', '--dry-run', action='store_true', dest='dry_run', default=False, help="don't perform write actions, just simulate") oparser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, help="show operations log") oparser.add_option('', '--no-progressbar', action='store_false', dest='pbar', default=True, help="don't show progress bar") optgroup = optparse.OptionGroup(oparser, 'File handling options') optgroup.add_option('-m', '--method', dest='method', default=M_COPY, help='file processing method ({0})'.format('|'.join( config.methods))) optgroup.add_option('-r', '--remove-empty', action='store_true', dest='remove_empty', default=False, help='remove empty directories') optgroup.add_option('', '--remove-duplicates', action='store_true', dest='remove_duplicates', default=False, help='remove files that already exist in repository') oparser.add_option_group(optgroup) optgroup = optparse.OptionGroup(oparser, 'CSV options') optgroup.add_option('', '--csv', dest='csv', metavar='FILENAME', default='libgen.csv', help='path to csv (%default)') oparser.add_option_group(optgroup) optgroup = optparse.OptionGroup(oparser, "DB connection options") optgroup.add_option('', '--db-host', default='localhost', help='DB host (%default)') optgroup.add_option('', '--db-name', default='bookwarrior', help='DB name (%default)') optgroup.add_option('', '--db-user', help='DB user') optgroup.add_option('', '--db-passwd', metavar='PASSWD', default='', help='DB password (empty)') oparser.add_option_group(optgroup) (options, args) = oparser.parse_args() if len(args) != 2: oparser.error('Wrong number of arguments') if options.method not in config.methods: oparser.error(u'Unknown file processing method "{0}"'.format( options.method)) if config.methods[options.method] is None: return error(config.get_error_message(options.method)) config.src, config.dst = (os.path.abspath(arg).decode(config.encoding) for arg in args) if not os.path.isdir(config.src): return error(u'Directory {0} not found'.format(config.src)) if not os.path.isdir(config.dst): return error(u'Directory {0} not found'.format(config.dst)) if not os.access(config.src, os.R_OK): return error(u'Not enough rights for reading from %s' % config.src) if ((options.remove_empty or options.remove_duplicates or options.method == M_MOVE) and not os.access(config.src, os.W_OK)): return error(u'Not enough rights for writing to %s' % config.src) if not os.access(config.dst, os.W_OK): return error(u'Not enough rights for writing to %s' % config.dst) # проверим, поддерживает ли файловая система создание ссылок # в Windows мягкие и жёсткие ссылки можно создавать только на NTFS # (жёсткие — только в пределах одного диска) if config.windows and options.method in (M_SYMLINK, M_HARDLINK): message = config.checkfs(options.method) if message: return error(message) if options.db_user: worker = loader.DBLoader(options.db_host, options.db_name, options.db_user, options.db_passwd) else: if not os.path.isfile(options.csv): return error(u'File {0} not found'.format(options.csv)) worker = loader.CSVLoader(options.csv) print('Loading Library Genesis...') library = worker.load(options.pbar) library_filesizes = set(value[1] for value in library.values()) print('{0} books loaded'.format(len(library))) print('Analyzing total size of files for processing...', end=' ') src_size = dirsize(config.src) print(bytes_to_human(src_size)) print('Scanning...') processed, added, duplicate = ProgressCounter(), ProgressCounter( ), ProgressCounter() pbar = ProgressBar(maxval=src_size, displaysize=True, enabled=options.pbar) log.set_pbar(pbar) delta = src_size / CHECK_PROGRESS_DIVIDER for path, dirs, files in os.walk(config.src): for file in files: fullpath = os.path.join(path, file) filesize = os.path.getsize(fullpath) # если в базе есть файл такого размера if filesize in library_filesizes: md5 = md5hash(fullpath) # и совпал по хешу if md5 in library: # то обрабатываем его already_in_repo = process(fullpath, library[md5][0], options) if already_in_repo: duplicate.add(filesize) else: added.add(filesize) processed.add(filesize) # будем обновлять, только если накопилось достаточно файлов if processed.size - pbar.curval >= delta: pbar.set(processed.size) if not options.dry_run and options.remove_empty and dirsize(path) == 0: shutil.rmtree(path) pbar.finish() log.unset_pbar() print('Processed: {0} ({1})'.format(processed.count, bytes_to_human(processed.size))) print('Added to repository ({0}): {1} ({2})'.format( config.method_descriptions[options.method], added.count, bytes_to_human(added.size))) print('Duplicates {0}: {1} ({2})'.format( 'removed' if options.remove_duplicates else 'found', duplicate.count, bytes_to_human(duplicate.size))) return 0