def test_download_all_with_pooch(): # jni first wrote this test with the intention of # fully deleting the files in the data_dir, # then ensure that the data gets downloaded accordingly. # hmaarrfk raised the concern that this test wouldn't # play well with parallel testing since we # may be breaking the global state that certain other # tests require, especially in parallel testing # The second concern is that this test essentially uses # alot of bandwidth, which is not fun for developers on # lower speed connections. # https://github.com/scikit-image/scikit-image/pull/4666/files/26d5138b25b958da6e97ebf979e9bc36f32c3568#r422604863 data_dir = data.data_dir if image_fetcher is not None: data.download_all() assert len(os.listdir(data_dir)) > 50 else: with pytest.raises(ModuleNotFoundError): data.download_all()
def main(): import timerit from os.path import join from skimage import data as skimage_data from skimage.data import image_fetcher skimage_data.download_all() fpaths = [ join(image_fetcher.path, fname) for fname in image_fetcher.registry.keys() if fname.endswith(('.tif', '.png', '.jpg')) ] # Load a lot of files fpaths = fpaths * 15 if 0: # Sanity check counts, images = zip(*list(load_serial(fpaths))) print('counts = {!r}'.format(counts)) counts, images = zip(*list(load_concurrent(fpaths))) print('counts = {!r}'.format(counts)) counts, images = zip(*list(load_asyncio_pure_python(fpaths))) print('counts = {!r}'.format(counts)) ti = timerit.Timerit(50, bestof=3, verbose=1) for timer in ti.reset('concurrent'): with timer: list(load_concurrent(fpaths)) for timer in ti.reset('load_asyncio_pure_python'): with timer: list(load_asyncio_pure_python(fpaths)) for timer in ti.reset('load_asyncio_with_uvloop'): with timer: list(load_asyncio_with_uvloop(fpaths)) for timer in ti.reset('serial'): with timer: list(load_serial(fpaths))
def test_download_all(): datadir = data.data_dir for filename in os.listdir(datadir): os.remove(os.path.join(datadir, filename)) data.download_all() assert len(os.listdir(datadir)) > 50