def external_file(*urls, filename = None, directory = None, copy_file_url = False, name = None): """ Downloads a file from the first valid URL and saves it into *directory* under the specified *filename*. :param urls: One or more URLs. Supports ``http://``, ``https://``, ``ftp://`` and ``file://`. Note that if a ``file://`` URL is specified, the file is not copied to the output filename unless *copy_file_url* is True. :param filename: The output filename of the downloaded file. Defaults to the filename of the downloaded file. :param directory: The directory to save the file to. If *filename* is a relative path, it will be joined with this directory. Defaults to a path in the build directory. :param copy_file_url: If True, ``file://`` URLs will be copied instead of used as-is. :param name: The name of the loader action. This name is used to store information in the :attr:`Session.cache` so we can re-use existing downloaded data. :func:`~craftr.defaults.gtn` will be used to retrieve the default value for this parameter. :return: The path to the downloaded file. """ name = gtn(name) if not directory and not filename: directory = buildlocal('data') cache = get_loader_cache(name) # TODO: expand variables of the current module. target_filename = None exceptions = [] for url in urls: if url == cache.get('download_url'): existing_file = cache.get('download_file') if existing_file and path.isfile(existing_file): return existing_file progress_info = 'Downloading {} ...'.format(url) if url.startswith('file://'): source_file = url[7:] if path.isfile(source_file): if not copy_file_url: return source_file if not filename: filename = path.basename(source_file) # TODO: Use httputils.download_file() for this as well? logger.progress_begin(progress_info) path.makedirs(directory) target_filename = path.join(directory, filename) with open(source_file, 'rb') as sfp: with open(target_filename, 'wb') as dfp: for bytes_copied, size in pyutils.copyfileobj(sfp, dfp): logger.progress_update(float(bytes_copied) / size) logger.progress_end() # TODO: Copy file permissions break else: exceptions.append(FileNotFoundError(url)) else: progress = lambda data: _external_file_download_callback( progress_info, directory, filename, cache, data) try: target_filename, reused = httputils.download_file( url, filename = filename, directory = directory, on_exists = 'skip', progress = progress) except (httputils.URLError, httputils.HTTPError) as exc: exceptions.append(exc) else: break finally: logger.progress_end() if target_filename: cache['download_url'] = url cache['download_file'] = target_filename return target_filename raise NoExternalFileMatch(name, urls, exceptions)
def external_archive(*urls, exclude_files = (), directory = None, name = None): """ Downloads an archive from the first valid URL and unpacks it into *directory*. Archives with a single directory at the root will be extracted from one level below, eliminating that single parent directory. *exclude_files* can be a list of glob patterns that will be matched against the arcnames in the archive. Note that to exclude a directory, a pattern must match all files in that directory. Uses :func:`external_file` to download the archive. :param urls: See :func:`external_file` :param exclude_files: A list of glob patterns. :param directory: The directory to unpack the archive to. Defaults to a directory on the build directory derived from the downloaded archive filename. If defined and followed by a trailing slash, the archive filename will be appended. :param name: The name of the loader action. This name is used to store information in the :attr:`Session.cache` so we can re-use existing downloaded data. :func:`~craftr.defaults.gtn` will be used to retrieve the default value for this parameter. :return: The path to the top-level directory of the unpacked archive. """ name = gtn(name) if not directory: directory = buildlocal('data') + '/' archive = external_file(*urls, directory = directory, name = name) cache = get_loader_cache(name) # shared with external_file() suffix = nr.misc.archive.get_opener(archive)[0] if path.maybedir(directory): filename = path.basename(archive)[:-len(suffix)] directory = path.join(directory, filename) # Check if we already unpacked it etc. if cache.get('archive_source') == archive and \ cache.get('archive_dir') == directory and \ path.isdir(directory): return directory def match_exclude_files(arcname): for pattern in exclude_files: if fnmatch.fnmatch(arcname, pattern): return False return True def progress(index, count, filename): if index == -1: logger.progress_begin("Unpacking {} ...".format(path.basename(archive))) logger.progress_update(0.0, 'Reading index...') return progress = index / float(count) if index == 0: logger.progress_end() logger.progress_begin(None, False) elif index == (count - 1): logger.progress_end() else: logger.progress_update(progress, '{} / {}'.format(index, count)) nr.misc.archive.extract(archive, directory, suffix = suffix, unpack_single_dir = True, check_extract_file = match_exclude_files, progress_callback = progress) cache['archive_source'] = archive cache['archive_dir'] = directory return directory