def test_download_bad_url(): """ Check that we error when the url is bad CommandLine: python -m ubelt.tests.test_download test_download_bad_url --verbose """ url = 'http://a-very-incorrect-url' if not ub.argflag('--network'): pytest.skip('not running network tests') dpath = ub.ensure_app_cache_dir('ubelt', 'tests') fname = basename(url) fpath = join(dpath, fname) ub.delete(fpath) assert not exists(fpath) # from ubelt.util_download import URLError import six if six.PY2: # nocover from urllib2 import URLError # NOQA else: from urllib.error import URLError # NOQA with pytest.raises(URLError): ub.download(url, fpath=fpath, verbose=1)
def test_download_bad_url(): """ Check that we error when the url is bad Notes: For some reason this can take a long time to realize there is no URL, even if the timeout is specified and fairly low. CommandLine: python tests/test_download.py test_download_bad_url --verbose """ url = 'http://www.a-very-incorrect-url.gov/does_not_exist.txt' # if not ub.argflag('--network'): # pytest.skip('not running network tests') # Ensure the opener exist import sys if sys.version_info[0] == 2: # nocover # import urllib2 as urllib_x from urllib2 import URLError # NOQA else: # import urllib.request as urllib_x from urllib.error import URLError # NOQA # if urllib_x._opener is None: # urllib_x.install_opener(urllib_x.build_opener()) dpath = ub.ensure_app_cache_dir('ubelt', 'tests') fname = basename(url) fpath = join(dpath, fname) ub.delete(fpath) assert not exists(fpath) with pytest.raises(URLError): ub.download(url, fpath=fpath, verbose=1, timeout=1.0)
def have_gov_certs(): try: # Test to see if we have certs ub.download( 'https://gwg.nga.mil/ntb/baseline/software/testfile/Nitfv2_1/scen_2_1.html' ) return True except urllib.request.URLError: return False
def test_download_bad_url(): """ Check where the url is downloaded to when fpath is not specified. """ url = 'http://averyincorrecturl' dpath = ub.ensure_app_cache_dir('ubelt', 'tests') fname = basename(url) fpath = join(dpath, fname) ub.delete(fpath) assert not exists(fpath) with pytest.raises(Exception): ub.download(url, fpath=fpath)
def test_download_cover_hashers(): # url = 'https://www.dropbox.com/s/jl506apezj42zjz/ibeis-win32-setup-ymd_hm-2015-08-01_16-28.exe?dl=1' url = 'http://i.imgur.com/rqwaDag.png' if not ub.argflag('--network'): pytest.skip('not running network tests') dpath = ub.ensure_app_cache_dir('ubelt') fname = basename(url) # add coverage for different hashers ub.download(url, hasher='md5', hash_prefix='545e3a51404f664e46aa65', dpath=dpath, fname=fname) ub.download(url, hasher='sha256', hash_prefix='31a129618c87dd667103', dpath=dpath, fname=fname)
def main(): """ Checks that the latest wheels on pypi agree with the gpg key """ import requests package_name = 'ubelt' url = "https://pypi.python.org/pypi/{}/json".format(package_name) package = requests.get(url).json() max_ver = max(package["releases"].keys()) # ... check compatibility latest_wheel_info_list = package['releases'][max_ver] for wheel_info in latest_wheel_info_list: import ubelt as ub whl_fpath = ub.grabdata(wheel_info['url'], hash_prefix=wheel_info['digests']['sha256'], hasher='sha256') if not wheel_info['has_sig']: raise ValueError('info says no sig') sig_fpath = ub.download(wheel_info['url'] + '.asc', ) info = ub.cmd('gpg --verify {} {}'.format(sig_fpath, whl_fpath), verbose=3) assert info['ret'] == 0
def test_download_with_progkw(): """ Test that progkw is properly passed through to ub.download """ url = _demo_url(128 * 10) dpath = ub.ensure_app_cache_dir('ubelt', 'tests') fname = basename(url) fpath = join(dpath, fname) with ub.CaptureStdout() as cap: ub.download(url, fpath=fpath, progkw={ 'verbose': 3, 'freq': 1, 'adjust': False }, chunksize=128) assert len(cap.text.split('\n')) > 10
def download_metadata_file(url, outputdir, program): """Download and unzip the catalogue files.""" if outputdir is None: outputdir = FELS_DEFAULT_OUTPUTDIR zipped_index_path = os.path.join(outputdir, 'index_' + program + '.csv.gz') index_path = os.path.join(outputdir, 'index_' + program + '.csv') if not os.path.isfile(index_path): if not os.path.isfile(zipped_index_path): if not os.path.exists(os.path.dirname(zipped_index_path)): os.makedirs(os.path.dirname(zipped_index_path)) print('Downloading Metadata file...') print('url = {!r}'.format(url)) print('outputdir = {!r}'.format(outputdir)) ubelt.download(url, fpath=zipped_index_path, chunksize=int(2**22)) print('Unzipping Metadata file...') with gzip.open(zipped_index_path) as gzip_index, open( index_path, 'wb') as f: shutil.copyfileobj(gzip_index, f) ubelt.delete(zipped_index_path) # remove archive file return index_path
def test_download_with_io(): import ubelt as ub import io url = _demo_url(128 * 3) file = io.BytesIO() fpath = ub.download(url, file) assert fpath is file file.seek(0) data = file.read() hashstr = ub.hash_data(data, hasher='sha1') assert hashstr.startswith('45a5c851bf12d1')
def test_download_cover_hashers(): # url = 'https://www.dropbox.com/s/jl506apezj42zjz/ibeis-win32-setup-ymd_hm-2015-08-01_16-28.exe?dl=1' # url = 'http://i.imgur.com/rqwaDag.png' # if not ub.argflag('--network'): # pytest.skip('not running network tests') url = _demo_url() dpath = ub.ensure_app_cache_dir('ubelt') fname = basename(url) # add coverage for different hashers ub.download(url, hasher='md5', hash_prefix='e09c80c42fda55f9d992e59ca6b33', dpath=dpath, fname=fname) ub.download(url, hasher='sha256', hash_prefix='bf2cb58a68f684d95a3b78ef8f', dpath=dpath, fname=fname)
def test_download_no_fpath(): url = 'http://i.imgur.com/rqwaDag.png' dpath = ub.ensure_app_cache_dir('ubelt') fname = basename(url) fpath = join(dpath, fname) ub.delete(fpath) assert not exists(fpath) got_fpath = ub.download(url) assert got_fpath == fpath assert exists(fpath)
def test_download_chunksize(): # url = 'https://www.dropbox.com/s/jl506apezj42zjz/ibeis-win32-setup-ymd_hm-2015-08-01_16-28.exe?dl=1' url = 'http://i.imgur.com/rqwaDag.png' dpath = ub.ensure_app_cache_dir('ubelt') fname = basename(url) fpath = join(dpath, fname) ub.delete(fpath) assert not exists(fpath) got_fpath = ub.download(url, chunksize=2) assert got_fpath == fpath assert exists(fpath)
def test_download_no_fpath(): # url = 'http://i.imgur.com/rqwaDag.png' # if not ub.argflag('--network'): # pytest.skip('not running network tests') url = _demo_url() dpath = ub.ensure_app_cache_dir('ubelt') fname = basename(url) fpath = join(dpath, fname) ub.delete(fpath) assert not exists(fpath) got_fpath = ub.download(url) assert got_fpath == fpath assert exists(fpath)
def test_download_with_fpath(): url = 'http://i.imgur.com/rqwaDag.png' dpath = ub.ensure_app_cache_dir('ubelt', 'tests') fname = basename(url) fpath = join(dpath, fname) ub.delete(fpath) assert not exists(fpath) got_fpath = ub.download(url, fpath=fpath) assert got_fpath == fpath assert exists(fpath) with open(got_fpath, 'rb') as file: data = file.read() assert len(data) > 1200, 'should have downloaded some bytes'
def test_download_chunksize(): # url = 'https://www.dropbox.com/s/jl506apezj42zjz/ibeis-win32-setup-ymd_hm-2015-08-01_16-28.exe?dl=1' # url = 'http://i.imgur.com/rqwaDag.png' # if not ub.argflag('--network'): # pytest.skip('not running network tests') url = _demo_url() dpath = ub.ensure_app_cache_dir('ubelt') fname = basename(url) fpath = join(dpath, fname) ub.delete(fpath) assert not exists(fpath) got_fpath = ub.download(url, chunksize=2) assert got_fpath == fpath assert exists(fpath)
def test_download_with_fpath(): # url = 'http://i.imgur.com/rqwaDag.png' # if not ub.argflag('--network'): # pytest.skip('not running network tests') url = _demo_url(1201) dpath = ub.ensure_app_cache_dir('ubelt', 'tests') fname = basename(url) fpath = join(dpath, fname) ub.delete(fpath) assert not exists(fpath) got_fpath = ub.download(url, fpath=fpath) assert got_fpath == fpath assert exists(fpath) with open(got_fpath, 'rb') as file: data = file.read() assert len(data) > 1200, 'should have downloaded some bytes'
def test_download_hashalgo(): # url = 'https://www.dropbox.com/s/jl506apezj42zjz/ibeis-win32-setup-ymd_hm-2015-08-01_16-28.exe?dl=1' import hashlib url = 'http://i.imgur.com/rqwaDag.png' if not ub.argflag('--network'): pytest.skip('not running network tests') dpath = ub.ensure_app_cache_dir('ubelt') fname = basename(url) fpath = join(dpath, fname) ub.delete(fpath) assert not exists(fpath) got_fpath = ub.download(url, hash_prefix='545e3a51404f664e46aa65a70948e126', hasher=hashlib.md5()) assert got_fpath == fpath assert exists(fpath)
def test_download_hashalgo(): # url = 'https://www.dropbox.com/s/jl506apezj42zjz/ibeis-win32-setup-ymd_hm-2015-08-01_16-28.exe?dl=1' import hashlib # url = 'http://i.imgur.com/rqwaDag.png' # if not ub.argflag('--network'): # pytest.skip('not running network tests') url = _demo_url() dpath = ub.ensure_app_cache_dir('ubelt') fname = basename(url) fpath = join(dpath, fname) ub.delete(fpath) assert not exists(fpath) got_fpath = ub.download(url, hash_prefix='e09c80c42fda55f9d992e59ca6b3307d', hasher=hashlib.md5()) assert got_fpath == fpath assert exists(fpath)
def test_download_with_sha1_hasher(): import ubelt as ub url = _demo_url(128 * 4) ub.download(url, hasher='sha1', hash_prefix='164557facb7392')
def test_local_download(): server = SingletonTestServer.instance() url = server.write_file(filebytes=int(10 * 2**20))[0] ub.download(url)
import pandas as pd from dateutil import parser import datetime import pathlib sns = kwplot.autosns() repo_fpath = pathlib.Path('/data/joncrall/COVID-19/csse_covid_19_data') data = pd.read_csv( '/home/joncrall/Downloads/Provisional_COVID-19_Deaths_by_Sex_and_Age.csv') # data[data['Province_State'] == 'Florida'] url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv' file = io.BytesIO() ub.download(url, fpath=file) file.seek(0) data = pd.read_csv(file) sub = data[[ 'Start Date', 'End Date', 'Age Group', 'COVID-19 Deaths', 'State', 'Sex' ]] flags1 = (sub['State'] == 'Florida') | (sub['State'] == 'New York') flags1 &= sub['Sex'] == 'All Sexes' sub2 = sub[flags1] flags3 = ( (sub2['Age Group'] != 'All Ages') & (sub2['Age Group'] != 'Under 1 year') & (sub2['Age Group'] != '1-4 years') & (sub2['Age Group'] != '5-14 years') & (sub2['Age Group'] != '15-24 years') & (sub2['Age Group'] != '0-17 years') & (sub2['Age Group'] != '25-34 years') &
def 下载文件(url, 保存文件路径=None): fpath = ub.download(url, fpath=保存文件路径, verbose=0) return ub.shrinkuser(fpath)
def test_local_download(): server = SingletonTestServer.instance() url = server.write_file(filebytes=int(10 * 2**20))[0] # also test with a timeout for lazy coverage ub.download(url, timeout=1000)
def get_sentinel2_image(url, outputdir, overwrite=False, partial=False, noinspire=False, reject_old=False): """ Collect the entire dir structure of the image files from the manifest.safe file and build the same structure in the output location. Returns: True if image was downloaded False if partial=False and image was not fully downloaded or if reject_old=True and it is old-format or if noinspire=False and INSPIRE file is missing """ img = os.path.basename(url) target_path = os.path.join(outputdir, img) target_manifest = os.path.join(target_path, 'manifest.safe') return_status = True if not os.path.exists(target_path) or overwrite: manifest_url = url + '/manifest.safe' if reject_old: # check contents of manifest before downloading the rest content = urlopen(manifest_url) with NamedTemporaryFile() as f: shutil.copyfileobj(content, f) if not is_new(f.name): return False os.makedirs(target_path, exist_ok=True) content = urlopen(manifest_url) with open(target_manifest, 'wb') as f: shutil.copyfileobj(content, f) with open(target_manifest, 'r') as manifest_file: manifest_lines = manifest_file.read().split() for line in manifest_lines: if 'href' in line: rel_path = line[line.find('href=".') + 7:] rel_path = rel_path[:rel_path.find('"')] abs_path = os.path.join(target_path, *rel_path.split('/')[1:]) if not os.path.exists(os.path.dirname(abs_path)): os.makedirs(os.path.dirname(abs_path)) try: ubelt.download(url + rel_path, fpath=abs_path) except HTTPError as error: print('Error downloading {} [{}]'.format( url + rel_path, error)) continue granule = os.path.dirname( os.path.dirname(get_S2_image_bands(target_path, 'B01'))) for extra_dir in ('AUX_DATA', 'HTML'): if not os.path.exists(os.path.join(target_path, extra_dir)): os.makedirs(os.path.join(target_path, extra_dir)) if not os.path.exists(os.path.join(granule, extra_dir)): os.makedirs(os.path.join(granule, extra_dir)) if not manifest_lines: print() elif reject_old and not is_new(target_manifest): print(f'Warning: old-format image {outputdir} exists') return_status = False if partial: tile_chk = check_full_tile(get_S2_image_bands(target_path, 'B01')) if tile_chk == 'Partial': print('Removing partial tile image files...') shutil.rmtree(target_path) return_status = False if not noinspire: inspire_file = os.path.join(target_path, 'INSPIRE.xml') if os.path.isfile(inspire_file): inspire_path = get_S2_INSPIRE_title(inspire_file) if os.path.basename(target_path) != inspire_path: os.rename(target_path, inspire_path) else: print(f"File {inspire_file} could not be found.") return_status = False return return_status