def grab_file_url(file_url, appname='utool', download_dir=None, delay=None, spoof=False, fname=None, verbose=True, redownload=False, check_hash=False): r""" Downloads a file and returns the local path of the file. The resulting file is cached, so multiple calls to this function do not result in multiple dowloads. Args: file_url (str): url to the file appname (str): (default = 'utool') download_dir custom directory (None): (default = None) delay (None): delay time before download (default = None) spoof (bool): (default = False) fname (str): custom file name (default = None) verbose (bool): verbosity flag (default = True) redownload (bool): if True forces redownload of the file (default = False) check_hash (bool or iterable): if True, defaults to checking 4 hashes (in order): custom, md5, sha1, sha256. These hashes are checked for remote copies and, if found, will check the local file. You may also specify a list of hashes to check, for example ['md5', 'sha256'] in the specified order. The first verified hash to be found is used (default = False) Returns: str: fpath - file path string CommandLine: python -m utool.util_grabdata --test-grab_file_url:0 python -m utool.util_grabdata --test-grab_file_url:1 Example0: >>> # ENABLE_DOCTEST >>> from utool.util_grabdata import * # NOQA >>> import utool as ut # NOQA >>> from os.path import basename >>> ut.exec_funckw(ut.grab_file_url, locals()) >>> file_url = 'http://i.imgur.com/JGrqMnV.png' >>> redownload = True >>> fname = 'lena.png' >>> lena_fpath = ut.grab_file_url(file_url, fname=fname, >>> redownload=redownload) >>> result = basename(lena_fpath) >>> print(result) lena.png Example1: >>> # ENABLE_DOCTEST >>> from utool.util_grabdata import * # NOQA >>> import utool as ut # NOQA >>> ut.exec_funckw(ut.grab_file_url, locals()) >>> file_url = 'https://lev.cs.rpi.edu/public/models/detect.yolo.12.classes' >>> fname = 'detect.yolo.12.classes' >>> check_hash = True >>> fpath = ut.grab_file_url(file_url, fname=fname, check_hash=check_hash) """ file_url = clean_dropbox_link(file_url) if fname is None: fname = basename(file_url) # Download zipfile to if download_dir is None: download_dir = util_cplat.get_app_resource_dir(appname) # Zipfile should unzip to: fpath = join(download_dir, fname) # If check hash, get remote hash and assert local copy is the same if check_hash: if isinstance(check_hash, (list, tuple)): hash_list = check_hash else: hash_list = ['md5'] # hash_list = ['sha1.custom', 'md5', 'sha1', 'sha256'] # Get expected remote file hash_remote, hash_tag_remote = grab_file_remote_hash(file_url, hash_list, verbose=verbose) hash_list = [hash_tag_remote] # We have a valid candidate hash from remote, check for same hash locally hash_local, hash_tag_local = get_file_local_hash(fpath, hash_list, verbose=verbose) if verbose: print('[utool] Pre Local Hash: %r' % (hash_local, )) print('[utool] Pre Remote Hash: %r' % (hash_remote, )) # Check all 4 hash conditions if hash_remote is None: # No remote hash provided, turn off post-download hash check check_hash = False elif hash_local is None: if verbose: print('[utool] Remote hash provided but local hash missing, redownloading.') redownload = True elif hash_local == hash_remote: assert hash_tag_local == hash_tag_remote, ('hash tag disagreement') else: if verbose: print('[utool] Both hashes provided, but they disagree, redownloading.') redownload = True # Download util_path.ensurepath(download_dir) if redownload or not exists(fpath): # Download testdata if verbose: print('[utool] Downloading file %s' % fpath) if delay is not None: print('[utool] delay download by %r seconds' % (delay,)) time.sleep(delay) download_url(file_url, fpath, spoof=spoof) else: if verbose: print('[utool] Already have file %s' % fpath) util_path.assert_exists(fpath) # Post-download local hash verification if check_hash: # File has been successfuly downloaded, write remote hash to local hash file hash_fpath = '%s.%s' % (fpath, hash_tag_remote, ) with open(hash_fpath, 'w') as hash_file: hash_file.write(hash_remote) # For sanity check (custom) and file verification (hashing), get local hash again hash_local, hash_tag_local = get_file_local_hash(fpath, hash_list, verbose=verbose) if verbose: print('[utool] Post Local Hash: %r' % (hash_local, )) assert hash_local == hash_remote, 'Post-download hash disagreement' assert hash_tag_local == hash_tag_remote, 'Post-download hash tag disagreement' return fpath
def grab_zipped_url(zipped_url, ensure=True, appname='utool', download_dir=None, force_commonprefix=True, cleanup=False, redownload=False, spoof=False): r""" downloads and unzips the url Args: zipped_url (str): url which must be either a .zip of a .tar.gz file ensure (bool): eager evaluation if True(default = True) appname (str): (default = 'utool') download_dir (str): containing downloading directory force_commonprefix (bool): (default = True) cleanup (bool): (default = False) redownload (bool): (default = False) spoof (bool): (default = False) CommandLine: python -m utool.util_grabdata --exec-grab_zipped_url --show Example: >>> # DISABLE_DOCTEST >>> from utool.util_grabdata import * # NOQA >>> import utool as ut >>> zipped_url = '?' >>> ensure = True >>> appname = 'utool' >>> download_dir = None >>> force_commonprefix = True >>> cleanup = False >>> redownload = False >>> spoof = False >>> result = grab_zipped_url(zipped_url, ensure, appname, download_dir, >>> force_commonprefix, cleanup, redownload, >>> spoof) >>> print(result) Examples: >>> from utool.util_grabdata import * # NOQA >>> zipped_url = 'https://lev.cs.rpi.edu/public/data/testdata.zip' >>> zipped_url = 'http://www.spam.com/eggs/data.zip' """ zipped_url = clean_dropbox_link(zipped_url) zip_fname = split(zipped_url)[1] data_name = split_archive_ext(zip_fname)[0] # Download zipfile to if download_dir is None: download_dir = util_cplat.get_app_resource_dir(appname) # Zipfile should unzip to: data_dir = join(download_dir, data_name) if ensure or redownload: if redownload: util_path.remove_dirs(data_dir) util_path.ensurepath(download_dir) if not exists(data_dir) or redownload: # Download and unzip testdata zip_fpath = realpath(join(download_dir, zip_fname)) #print('[utool] Downloading archive %s' % zip_fpath) if not exists(zip_fpath) or redownload: download_url(zipped_url, zip_fpath, spoof=spoof) unarchive_file(zip_fpath, force_commonprefix) if cleanup: util_path.delete(zip_fpath) # Cleanup if cleanup: util_path.assert_exists(data_dir) return util_path.unixpath(data_dir)
def grab_file_url(file_url, ensure=True, appname='utool', download_dir=None, delay=None, spoof=False, fname=None, verbose=True, redownload=False): r""" Downloads a file and returns the local path of the file. The resulting file is cached, so multiple calls to this function do not result in multiple dowloads. Args: file_url (str): url to the file ensure (bool): if False the file is assumed to be downloaed (default = True) appname (str): (default = 'utool') download_dir custom directory (None): (default = None) delay (None): delay time before download (default = None) spoof (bool): (default = False) fname (str): custom file name (default = None) verbose (bool): verbosity flag (default = True) redownload (bool): if True forces redownload of the file (default = False) Returns: str: fpath CommandLine: sh -c "python ~/code/utool/utool/util_grabdata.py --all-examples" python -m utool.util_grabdata --test-grab_file_url Example: >>> # ENABLE_DOCTEST >>> from utool.util_grabdata import * # NOQA >>> import utool as ut # NOQA >>> from os.path import basename >>> file_url = 'http://i.imgur.com/JGrqMnV.png' >>> ensure = True >>> appname = 'utool' >>> download_dir = None >>> delay = None >>> spoof = False >>> verbose = True >>> redownload = True >>> fname = 'lena.png' >>> lena_fpath = ut.grab_file_url(file_url, ensure, appname, download_dir, >>> delay, spoof, fname, verbose, redownload) >>> result = basename(lena_fpath) >>> print(result) lena.png """ file_url = clean_dropbox_link(file_url) if fname is None: fname = basename(file_url) # Download zipfile to if download_dir is None: download_dir = util_cplat.get_app_resource_dir(appname) # Zipfile should unzip to: fpath = join(download_dir, fname) if ensure or redownload: util_path.ensurepath(download_dir) if redownload or not exists(fpath): # Download testdata if verbose: print('[utool] Downloading file %s' % fpath) if delay is not None: print('[utool] delay download by %r seconds' % (delay,)) time.sleep(delay) download_url(file_url, fpath, spoof=spoof) else: if verbose: print('[utool] Already have file %s' % fpath) if ensure: util_path.assert_exists(fpath) return fpath