def files_to_be_fetched(self, normalize=True): for f, size, path in self.fetch_entries(): path = urlunquote(path) if normalize: yield os.path.normpath(path) else: yield path
def fetch_bag_files(bag, keychain_file=DEFAULT_KEYCHAIN_FILE, config_file=DEFAULT_CONFIG_FILE, force=False, callback=None, filter_expr=None, **kwargs): auth = read_keychain(keychain_file) config = read_config(config_file) cookies = get_request_cookies(config) if kwargs.get("cookie_scan", True) else None success = True current = 0 total = 0 if not callback else len(set(bag.files_to_be_fetched())) start = datetime.datetime.now() for entry in map(FetchEntry._make, bag.fetch_entries()): filename = urlunquote(entry.filename) if filter_expr: if not filter_dict(filter_expr, entry._asdict()): continue output_path = os.path.normpath(os.path.join(bag.path, filename)) local_size = os.path.getsize(output_path) if os.path.exists( output_path) else None try: remote_size = int(entry.length) except ValueError: remote_size = None missing = True if local_size is not None: if local_size == remote_size or remote_size is None: missing = False if not force and not missing: if logger.isEnabledFor(logging.DEBUG): logger.debug("Not fetching already present file: %s" % output_path) pass else: result_path = fetch_file(entry.url, output_path, auth, size=entry.length, config=config, cookies=cookies, **kwargs) if not result_path: success = False if callback: current += 1 if not callback(current, total): logger.warning("Fetch cancelled by user...") success = False break elapsed = datetime.datetime.now() - start logger.info("Fetch complete. Elapsed time: %s" % elapsed) cleanup_transports() return success
def ensure_valid_output_path(url, output_path=None): if not output_path: upr = urlsplit(url, allow_fragments=False) output_path = os.path.join(os.curdir, urlunquote(os.path.basename(upr.path))) output_path = os.path.abspath(output_path) output_dir = os.path.dirname(output_path) if not os.path.exists(output_dir): os.makedirs(output_dir) return output_path