def test_pool_with_memmap_array_view(tmpdir): """Check that subprocess can access and update shared memory array""" assert_array_equal = np.testing.assert_array_equal # Fork the subprocess before allocating the objects to be passed pool_temp_folder = tmpdir.mkdir('pool').strpath p = MemmapingPool(10, max_nbytes=2, temp_folder=pool_temp_folder) try: filename = tmpdir.join('test.mmap').strpath a = np.memmap(filename, dtype=np.float32, shape=(3, 5), mode='w+') a.fill(1.0) # Create an ndarray view on the memmap instance a_view = np.asarray(a) assert not isinstance(a_view, np.memmap) assert has_shareable_memory(a_view) p.map(inplace_double, [(a_view, (i, j), 1.0) for i in range(a.shape[0]) for j in range(a.shape[1])]) # Both a and the a_view have been updated assert_array_equal(a, 2 * np.ones(a.shape)) assert_array_equal(a_view, 2 * np.ones(a.shape)) # Passing memmap array view to the pool should not trigger the # creation of new files on the FS assert os.listdir(pool_temp_folder) == [] finally: p.terminate() del p
def test_memmaping_on_dev_shm(): """Check that MemmapingPool uses /dev/shm when possible""" p = MemmapingPool(3, max_nbytes=10) try: # Check that the pool has correctly detected the presence of the # shared memory filesystem. pool_temp_folder = p._temp_folder folder_prefix = '/dev/shm/joblib_memmaping_pool_' assert_true(pool_temp_folder.startswith(folder_prefix)) assert_true(os.path.exists(pool_temp_folder)) # Try with a file larger than the memmap threshold of 10 bytes a = np.ones(100, dtype=np.float64) assert_equal(a.nbytes, 800) p.map(id, [a] * 10) # a should have been memmaped to the pool temp folder: the joblib # pickling procedure generate a .pkl and a .npy file: assert_equal(len(os.listdir(pool_temp_folder)), 2) b = np.ones(100, dtype=np.float64) assert_equal(b.nbytes, 800) p.map(id, [b] * 10) # A copy of both a and b are not stored in the shared memory folder assert_equal(len(os.listdir(pool_temp_folder)), 4) finally: # Cleanup open file descriptors p.terminate() del p # The temp folder is cleaned up upon pool termination assert_false(os.path.exists(pool_temp_folder))
def test_memmaping_on_dev_shm(): """Check that MemmapingPool uses /dev/shm when possible""" p = MemmapingPool(3, max_nbytes=10) try: # Check that the pool has correctly detected the presence of the # shared memory filesystem. pool_temp_folder = p._temp_folder folder_prefix = '/dev/shm/joblib_memmaping_pool_' assert pool_temp_folder.startswith(folder_prefix) assert os.path.exists(pool_temp_folder) # Try with a file larger than the memmap threshold of 10 bytes a = np.ones(100, dtype=np.float64) assert a.nbytes == 800 p.map(id, [a] * 10) # a should have been memmaped to the pool temp folder: the joblib # pickling procedure generate one .pkl file: assert len(os.listdir(pool_temp_folder)) == 1 # create a new array with content that is different from 'a' so that # it is mapped to a different file in the temporary folder of the # pool. b = np.ones(100, dtype=np.float64) * 2 assert b.nbytes == 800 p.map(id, [b] * 10) # A copy of both a and b are now stored in the shared memory folder assert len(os.listdir(pool_temp_folder)) == 2 finally: # Cleanup open file descriptors p.terminate() del p # The temp folder is cleaned up upon pool termination assert not os.path.exists(pool_temp_folder)
def test_weak_array_key_map(): def assert_empty_after_gc_collect(container, retries=3): for i in range(retries): if len(container) == 0: return gc.collect() assert len(container) == 0 a = np.ones(42) m = _WeakArrayKeyMap() m.set(a, 'a') assert m.get(a) == 'a' b = a assert m.get(b) == 'a' m.set(b, 'b') assert m.get(a) == 'b' del a gc.collect() assert len(m._data) == 1 assert m.get(b) == 'b' del b assert_empty_after_gc_collect(m._data) c = np.ones(42) m.set(c, 'c') assert len(m._data) == 1 assert m.get(c) == 'c' with raises(KeyError): m.get(np.ones(42)) del c assert_empty_after_gc_collect(m._data) # Check that creating and dropping numpy arrays with potentially the same # object id will not cause the map to get confused. def get_set_get_collect(m, i): a = np.ones(42) with raises(KeyError): m.get(a) m.set(a, i) assert m.get(a) == i return id(a) unique_ids = set([get_set_get_collect(m, i) for i in range(1000)]) if platform.python_implementation() == 'CPython': # On CPython (at least) the same id is often reused many times for the # temporary arrays created under the local scope of the # get_set_get_collect function without causing any spurious lookups / # insertions in the map. assert len(unique_ids) < 100
def test_pool_with_memmap(tmpdir_path): """Check that subprocess can access and update shared memory memmap""" assert_array_equal = np.testing.assert_array_equal # Fork the subprocess before allocating the objects to be passed pool_temp_folder = os.path.join(tmpdir_path, 'pool') os.makedirs(pool_temp_folder) p = MemmapingPool(10, max_nbytes=2, temp_folder=pool_temp_folder) try: filename = os.path.join(tmpdir_path, 'test.mmap') a = np.memmap(filename, dtype=np.float32, shape=(3, 5), mode='w+') a.fill(1.0) p.map(inplace_double, [(a, (i, j), 1.0) for i in range(a.shape[0]) for j in range(a.shape[1])]) assert_array_equal(a, 2 * np.ones(a.shape)) # Open a copy-on-write view on the previous data b = np.memmap(filename, dtype=np.float32, shape=(5, 3), mode='c') p.map(inplace_double, [(b, (i, j), 2.0) for i in range(b.shape[0]) for j in range(b.shape[1])]) # Passing memmap instances to the pool should not trigger the creation # of new files on the FS assert os.listdir(pool_temp_folder) == [] # the original data is untouched assert_array_equal(a, 2 * np.ones(a.shape)) assert_array_equal(b, 2 * np.ones(b.shape)) # readonly maps can be read but not updated c = np.memmap(filename, dtype=np.float32, shape=(10,), mode='r', offset=5 * 4) assert_raises(AssertionError, p.map, check_array, [(c, i, 3.0) for i in range(c.shape[0])]) # depending on the version of numpy one can either get a RuntimeError # or a ValueError assert_raises((RuntimeError, ValueError), p.map, inplace_double, [(c, i, 2.0) for i in range(c.shape[0])]) finally: # Clean all filehandlers held by the pool p.terminate() del p
def test_numpy_persistence_bufferred_array_compression(): big_array = np.ones((_IO_BUFFER_SIZE + 100), dtype=np.uint8) filename = env['filename'] + str(random.randint(0, 1000)) numpy_pickle.dump(big_array, filename, compress=True) arr_reloaded = numpy_pickle.load(filename) np.testing.assert_array_equal(big_array, arr_reloaded)
def test_joblib_compression_formats(): compresslevels = (1, 3, 6) filename = env['filename'] + str(random.randint(0, 1000)) objects = (np.ones(shape=(100, 100), dtype='f8'), range(10), {'a': 1, 2: 'b'}, [], (), {}, 0, 1.0) for compress in compresslevels: for cmethod in _COMPRESSORS: dump_filename = filename + "." + cmethod for obj in objects: if not PY3_OR_LATER and cmethod in ('xz', 'lzma'): # Lzma module only available for python >= 3.3 msg = "{} compression is only available".format(cmethod) assert_raises_regex(NotImplementedError, msg, numpy_pickle.dump, obj, dump_filename, compress=(cmethod, compress)) else: numpy_pickle.dump(obj, dump_filename, compress=(cmethod, compress)) # Verify the file contains the right magic number with open(dump_filename, 'rb') as f: assert _detect_compressor(f) == cmethod # Verify the reloaded object is correct obj_reloaded = numpy_pickle.load(dump_filename) assert isinstance(obj_reloaded, type(obj)) if isinstance(obj, np.ndarray): np.testing.assert_array_equal(obj_reloaded, obj) else: assert obj_reloaded == obj os.remove(dump_filename)
def get_set_get_collect(m, i): a = np.ones(42) with raises(KeyError): m.get(a) m.set(a, i) assert m.get(a) == i return id(a)
def test_memmaping_pool_for_large_arrays(): """Check that large arrays are not copied in memory""" assert_array_equal = np.testing.assert_array_equal # Check that the tempfolder is empty assert_equal(os.listdir(TEMP_FOLDER), []) # Build an array reducers that automaticaly dump large array content # to filesystem backed memmap instances to avoid memory explosion p = MemmapingPool(3, max_nbytes=40, temp_folder=TEMP_FOLDER) try: # The tempory folder for the pool is not provisioned in advance assert_equal(os.listdir(TEMP_FOLDER), []) assert_false(os.path.exists(p._temp_folder)) small = np.ones(5, dtype=np.float32) assert_equal(small.nbytes, 20) p.map(check_array, [(small, i, 1.0) for i in range(small.shape[0])]) # Memory has been copied, the pool filesystem folder is unused assert_equal(os.listdir(TEMP_FOLDER), []) # Try with a file larger than the memmap threshold of 40 bytes large = np.ones(100, dtype=np.float64) assert_equal(large.nbytes, 800) p.map(check_array, [(large, i, 1.0) for i in range(large.shape[0])]) # The data has been dumped in a temp folder for subprocess to share it # without per-child memory copies assert_true(os.path.isdir(p._temp_folder)) dumped_filenames = os.listdir(p._temp_folder) assert_equal(len(dumped_filenames), 2) # Check that memmory mapping is not triggered for arrays with # dtype='object' objects = np.array(['abc'] * 100, dtype='object') results = p.map(has_shareable_memory, [objects]) assert_false(results[0]) finally: # check FS garbage upon pool termination p.terminate() assert_false(os.path.exists(p._temp_folder)) del p
def test_memmapping_pool_for_large_arrays(factory, tmpdir): """Check that large arrays are not copied in memory""" # Check that the tempfolder is empty assert os.listdir(tmpdir.strpath) == [] # Build an array reducers that automaticaly dump large array content # to filesystem backed memmap instances to avoid memory explosion p = factory(3, max_nbytes=40, temp_folder=tmpdir.strpath, verbose=2) try: # The temporary folder for the pool is not provisioned in advance assert os.listdir(tmpdir.strpath) == [] assert not os.path.exists(p._temp_folder) small = np.ones(5, dtype=np.float32) assert small.nbytes == 20 p.map(check_array, [(small, i, 1.0) for i in range(small.shape[0])]) # Memory has been copied, the pool filesystem folder is unused assert os.listdir(tmpdir.strpath) == [] # Try with a file larger than the memmap threshold of 40 bytes large = np.ones(100, dtype=np.float64) assert large.nbytes == 800 p.map(check_array, [(large, i, 1.0) for i in range(large.shape[0])]) # The data has been dumped in a temp folder for subprocess to share it # without per-child memory copies assert os.path.isdir(p._temp_folder) dumped_filenames = os.listdir(p._temp_folder) assert len(dumped_filenames) == 1 # Check that memory mapping is not triggered for arrays with # dtype='object' objects = np.array(['abc'] * 100, dtype='object') results = p.map(has_shareable_memory, [objects]) assert not results[0] finally: # check FS garbage upon pool termination p.terminate() assert not os.path.exists(p._temp_folder) del p
def test_memmapping_on_large_enough_dev_shm(factory): """Check that memmapping uses /dev/shm when possible""" orig_size = jmr.SYSTEM_SHARED_MEM_FS_MIN_SIZE try: # Make joblib believe that it can use /dev/shm even when running on a # CI container where the size of the /dev/shm is not very large (that # is at least 32 MB instead of 2 GB by default). jmr.SYSTEM_SHARED_MEM_FS_MIN_SIZE = int(32e6) p = factory(3, max_nbytes=10) try: # Check that the pool has correctly detected the presence of the # shared memory filesystem. pool_temp_folder = p._temp_folder folder_prefix = '/dev/shm/joblib_memmapping_folder_' assert pool_temp_folder.startswith(folder_prefix) assert os.path.exists(pool_temp_folder) # Try with a file larger than the memmap threshold of 10 bytes a = np.ones(100, dtype=np.float64) assert a.nbytes == 800 p.map(id, [a] * 10) # a should have been memmapped to the pool temp folder: the joblib # pickling procedure generate one .pkl file: assert len(os.listdir(pool_temp_folder)) == 1 # create a new array with content that is different from 'a' so # that it is mapped to a different file in the temporary folder of # the pool. b = np.ones(100, dtype=np.float64) * 2 assert b.nbytes == 800 p.map(id, [b] * 10) # A copy of both a and b are now stored in the shared memory folder assert len(os.listdir(pool_temp_folder)) == 2 finally: # Cleanup open file descriptors p.terminate() del p # The temp folder is cleaned up upon pool termination assert not os.path.exists(pool_temp_folder) finally: jmr.SYSTEM_SHARED_MEM_FS_MIN_SIZE = orig_size
def test_non_contiguous_array_pickling(tmpdir): filename = tmpdir.join('test.pkl').strpath for array in [ # Array that triggers a contiguousness issue with nditer, # see https://github.com/joblib/joblib/pull/352 and see # https://github.com/joblib/joblib/pull/353 np.asfortranarray([[1, 2], [3, 4]])[1:], # Non contiguous array with works fine with nditer np.ones((10, 50, 20), order='F')[:, :1, :] ]: assert not array.flags.c_contiguous assert not array.flags.f_contiguous numpy_pickle.dump(array, filename) array_reloaded = numpy_pickle.load(filename) np.testing.assert_array_equal(array_reloaded, array)
def test_memory_usage(): # Verify memory stays within expected bounds. filename = env['filename'] small_array = np.ones((10, 10)) big_array = np.ones(shape=100 * int(1e6), dtype=np.uint8) small_matrix = np.matrix(small_array) big_matrix = np.matrix(big_array) for compress in (True, False): for obj in (small_array, big_array, small_matrix, big_matrix): size = obj.nbytes / 1e6 obj_filename = filename + str(np.random.randint(0, 1000)) mem_used = memory_used(numpy_pickle.dump, obj, obj_filename, compress=compress) # The memory used to dump the object shouldn't exceed the buffer # size used to write array chunks (16MB). write_buf_size = _IO_BUFFER_SIZE + 16 * 1024 ** 2 / 1e6 assert mem_used <= write_buf_size mem_used = memory_used(numpy_pickle.load, obj_filename) # memory used should be less than array size + buffer size used to # read the array chunk by chunk. read_buf_size = 32 + _IO_BUFFER_SIZE # MiB assert mem_used < size + read_buf_size
def test_non_contiguous_array_pickling(): filename = env['filename'] + str(random.randint(0, 1000)) for array in [ # Array that triggers a contiguousness issue with nditer, # see https://github.com/joblib/joblib/pull/352 and see # https://github.com/joblib/joblib/pull/353 np.asfortranarray([[1, 2], [3, 4]])[1:], # Non contiguous array with works fine with nditer np.ones((10, 50, 20), order='F')[:, :1, :]]: assert not array.flags.c_contiguous assert not array.flags.f_contiguous numpy_pickle.dump(array, filename) array_reloaded = numpy_pickle.load(filename) np.testing.assert_array_equal(array_reloaded, array) os.remove(filename)
def test_memory_usage(tmpdir, compress): # Verify memory stays within expected bounds. filename = tmpdir.join('test.pkl').strpath small_array = np.ones((10, 10)) big_array = np.ones(shape=100 * int(1e6), dtype=np.uint8) small_matrix = np.matrix(small_array) big_matrix = np.matrix(big_array) for obj in (small_array, big_array, small_matrix, big_matrix): size = obj.nbytes / 1e6 obj_filename = filename + str(np.random.randint(0, 1000)) mem_used = memory_used(numpy_pickle.dump, obj, obj_filename, compress=compress) # The memory used to dump the object shouldn't exceed the buffer # size used to write array chunks (16MB). write_buf_size = _IO_BUFFER_SIZE + 16 * 1024 ** 2 / 1e6 assert mem_used <= write_buf_size mem_used = memory_used(numpy_pickle.load, obj_filename) # memory used should be less than array size + buffer size used to # read the array chunk by chunk. read_buf_size = 32 + _IO_BUFFER_SIZE # MiB assert mem_used < size + read_buf_size
def test_non_contiguous_array_pickling(): filename = env['filename'] + str(random.randint(0, 1000)) for array in [ # Array that triggers a contiguousness issue with nditer, # see https://github.com/joblib/joblib/pull/352 and see # https://github.com/joblib/joblib/pull/353 np.asfortranarray([[1, 2], [3, 4]])[1:], # Non contiguous array with works fine with nditer np.ones((10, 50, 20), order='F')[:, :1, :] ]: nose.tools.assert_false(array.flags.c_contiguous) nose.tools.assert_false(array.flags.f_contiguous) numpy_pickle.dump(array, filename) array_reloaded = numpy_pickle.load(filename) np.testing.assert_array_equal(array_reloaded, array) os.remove(filename)
def test_memory_numpy_check_mmap_mode(tmpdir): """Check that mmap_mode is respected even at the first call""" memory = Memory(location=tmpdir.strpath, mmap_mode='r', verbose=0) @memory.cache() def twice(a): return a * 2 a = np.ones(3) b = twice(a) c = twice(a) assert isinstance(c, np.memmap) assert c.mode == 'r' assert isinstance(b, np.memmap) assert b.mode == 'r'
def test_nested_parallelism_with_dask(): distributed = pytest.importorskip('distributed') client = distributed.Client(n_workers=2, threads_per_worker=2) # noqa # 10 MB of data as argument to trigger implicit scattering data = np.ones(int(1e7), dtype=np.uint8) for i in range(2): with parallel_backend('dask'): backend_types_and_levels = _recursive_backend_info(data=data) assert len(backend_types_and_levels) == 4 assert all(name == 'DaskDistributedBackend' for name, _ in backend_types_and_levels) # No argument with parallel_backend('dask'): backend_types_and_levels = _recursive_backend_info() assert len(backend_types_and_levels) == 4 assert all(name == 'DaskDistributedBackend' for name, _ in backend_types_and_levels)
def test_memory_numpy_check_mmap_mode(tmpdir): """Check that mmap_mode is respected even at the first call""" memory = Memory(cachedir=tmpdir.strpath, mmap_mode='r', verbose=0) memory.clear(warn=False) @memory.cache() def twice(a): return a * 2 a = np.ones(3) b = twice(a) c = twice(a) assert isinstance(c, np.memmap) assert c.mode == 'r' assert isinstance(b, np.memmap) assert b.mode == 'r'
def test_memory_numpy_check_mmap_mode(): """Check that mmap_mode is respected even at the first call""" memory = Memory(cachedir=env["dir"], mmap_mode="r", verbose=0) memory.clear(warn=False) @memory.cache() def twice(a): return a * 2 a = np.ones(3) b = twice(a) c = twice(a) nose.tools.assert_true(isinstance(c, np.memmap)) nose.tools.assert_equal(c.mode, "r") nose.tools.assert_true(isinstance(b, np.memmap)) nose.tools.assert_equal(b.mode, "r")
def test_memory_numpy_check_mmap_mode(): """Check that mmap_mode is respected even at the first call""" memory = Memory(cachedir=env['dir'], mmap_mode='r', verbose=0) memory.clear(warn=False) @memory.cache() def twice(a): return a * 2 a = np.ones(3) b = twice(a) c = twice(a) nose.tools.assert_true(isinstance(c, np.memmap)) nose.tools.assert_equal(c.mode, 'r') nose.tools.assert_true(isinstance(b, np.memmap)) nose.tools.assert_equal(b.mode, 'r')
def test_memmaping_pool_for_large_arrays_in_return(tmpdir): """Check that large arrays are not copied in memory in return""" assert_array_equal = np.testing.assert_array_equal # Build an array reducers that automaticaly dump large array content # but check that the returned datastructure are regular arrays to avoid # passing a memmap array pointing to a pool controlled temp folder that # might be confusing to the user # The MemmapingPool user can always return numpy.memmap object explicitly # to avoid memory copy p = MemmapingPool(3, max_nbytes=10, temp_folder=tmpdir.strpath) try: res = p.apply_async(np.ones, args=(1000,)) large = res.get() assert not has_shareable_memory(large) assert_array_equal(large, np.ones(1000)) finally: p.terminate() del p
def test_memmapping_pool_for_large_arrays_in_return(factory, tmpdir): """Check that large arrays are not copied in memory in return""" assert_array_equal = np.testing.assert_array_equal # Build an array reducers that automaticaly dump large array content # but check that the returned datastructure are regular arrays to avoid # passing a memmap array pointing to a pool controlled temp folder that # might be confusing to the user # The MemmappingPool user can always return numpy.memmap object explicitly # to avoid memory copy p = factory(3, max_nbytes=10, temp_folder=tmpdir.strpath) try: res = p.apply_async(np.ones, args=(1000, )) large = res.get() assert not has_shareable_memory(large) assert_array_equal(large, np.ones(1000)) finally: p.terminate() del p
def test_memmaping_pool_for_large_arrays_disabled(): """Check that large arrays memmaping can be disabled""" # Set max_nbytes to None to disable the auto memmaping feature p = MemmapingPool(3, max_nbytes=None, temp_folder=TEMP_FOLDER) try: # Check that the tempfolder is empty assert_equal(os.listdir(TEMP_FOLDER), []) # Try with a file largish than the memmap threshold of 40 bytes large = np.ones(100, dtype=np.float64) assert_equal(large.nbytes, 800) p.map(check_array, [(large, i, 1.0) for i in range(large.shape[0])]) # Check that the tempfolder is still empty assert_equal(os.listdir(TEMP_FOLDER), []) finally: # Cleanup open file descriptors p.terminate() del p
def test_memmaping_pool_for_large_arrays_disabled(tmpdir): """Check that large arrays memmaping can be disabled""" # Set max_nbytes to None to disable the auto memmaping feature p = MemmapingPool(3, max_nbytes=None, temp_folder=tmpdir.strpath) try: # Check that the tempfolder is empty assert os.listdir(tmpdir.strpath) == [] # Try with a file largish than the memmap threshold of 40 bytes large = np.ones(100, dtype=np.float64) assert large.nbytes == 800 p.map(check_array, [(large, i, 1.0) for i in range(large.shape[0])]) # Check that the tempfolder is still empty assert os.listdir(tmpdir.strpath) == [] finally: # Cleanup open file descriptors p.terminate() del p
def test_memmapping_pool_for_large_arrays_disabled(factory, tmpdir): """Check that large arrays memmapping can be disabled""" # Set max_nbytes to None to disable the auto memmapping feature p = factory(3, max_nbytes=None, temp_folder=tmpdir.strpath) try: # Check that the tempfolder is empty assert os.listdir(tmpdir.strpath) == [] # Try with a file largish than the memmap threshold of 40 bytes large = np.ones(100, dtype=np.float64) assert large.nbytes == 800 p.map(check_array, [(large, i, 1.0) for i in range(large.shape[0])]) # Check that the tempfolder is still empty assert os.listdir(tmpdir.strpath) == [] finally: # Cleanup open file descriptors p.terminate() del p
def test_memory_numpy_check_mmap_mode(tmpdir, monkeypatch): """Check that mmap_mode is respected even at the first call""" memory = Memory(location=tmpdir.strpath, mmap_mode='r', verbose=0) @memory.cache() def twice(a): return a * 2 a = np.ones(3) b = twice(a) c = twice(a) assert isinstance(c, np.memmap) assert c.mode == 'r' assert isinstance(b, np.memmap) assert b.mode == 'r' # Corrupts the file, Deleting b and c mmaps # is necessary to be able edit the file del b del c gc.collect() corrupt_single_cache_item(memory) # Make sure that corrupting the file causes recomputation and that # a warning is issued. recorded_warnings = monkeypatch_cached_func_warn(twice, monkeypatch) d = twice(a) assert len(recorded_warnings) == 1 exception_msg = 'Exception while loading results' assert exception_msg in recorded_warnings[0] # Asserts that the recomputation returns a mmap assert isinstance(d, np.memmap) assert d.mode == 'r'
def test_joblib_compression_formats(): compresslevels = (1, 3, 6) filename = env['filename'] + str(random.randint(0, 1000)) objects = (np.ones(shape=(100, 100), dtype='f8'), range(10), { 'a': 1, 2: 'b' }, [], (), {}, 0, 1.0) for compress in compresslevels: for cmethod in _COMPRESSORS: dump_filename = filename + "." + cmethod for obj in objects: if not PY3_OR_LATER and cmethod in ('xz', 'lzma'): # Lzma module only available for python >= 3.3 msg = "{0} compression is only available".format(cmethod) assert_raises_regex(NotImplementedError, msg, numpy_pickle.dump, obj, dump_filename, compress=(cmethod, compress)) else: numpy_pickle.dump(obj, dump_filename, compress=(cmethod, compress)) # Verify the file contains the right magic number with open(dump_filename, 'rb') as f: nose.tools.assert_equal(_detect_compressor(f), cmethod) # Verify the reloaded object is correct obj_reloaded = numpy_pickle.load(dump_filename) nose.tools.assert_true(isinstance(obj_reloaded, type(obj))) if isinstance(obj, np.ndarray): np.testing.assert_array_equal(obj_reloaded, obj) else: nose.tools.assert_equal(obj_reloaded, obj) os.remove(dump_filename)
def test_memory_numpy_check_mmap_mode(tmpdir, monkeypatch): """Check that mmap_mode is respected even at the first call""" memory = Memory(location=tmpdir.strpath, mmap_mode='r', verbose=0) @memory.cache() def twice(a): return a * 2 a = np.ones(3) b = twice(a) c = twice(a) assert isinstance(c, np.memmap) assert c.mode == 'r' assert isinstance(b, np.memmap) assert b.mode == 'r' # Corrupts the file, Deleting b and c mmaps # is necessary to be able edit the file del b del c corrupt_single_cache_item(memory) # Make sure that corrupting the file causes recomputation and that # a warning is issued. recorded_warnings = monkeypatch_cached_func_warn(twice, monkeypatch) d = twice(a) assert len(recorded_warnings) == 1 exception_msg = 'Exception while loading results' assert exception_msg in recorded_warnings[0] # Asserts that the recomputation returns a mmap assert isinstance(d, np.memmap) assert d.mode == 'r'
def test_joblib_compression_formats(tmpdir, compress, cmethod): filename = tmpdir.join('test.pkl').strpath objects = (np.ones(shape=(100, 100), dtype='f8'), range(10), { 'a': 1, 2: 'b' }, [], (), {}, 0, 1.0) dump_filename = filename + "." + cmethod for obj in objects: if not PY3_OR_LATER and cmethod in ('lzma', 'xz', 'lz4'): # Lzma module only available for python >= 3.3 msg = "{} compression is only available".format(cmethod) error = NotImplementedError if cmethod == 'lz4': error = ValueError with raises(error) as excinfo: numpy_pickle.dump(obj, dump_filename, compress=(cmethod, compress)) excinfo.match(msg) elif cmethod == 'lz4' and with_lz4.args[0]: # Skip the test if lz4 is not installed. We here use the with_lz4 # skipif fixture whose argument is True when lz4 is not installed raise SkipTest("lz4 is not installed.") else: numpy_pickle.dump(obj, dump_filename, compress=(cmethod, compress)) # Verify the file contains the right magic number with open(dump_filename, 'rb') as f: assert _detect_compressor(f) == cmethod # Verify the reloaded object is correct obj_reloaded = numpy_pickle.load(dump_filename) assert isinstance(obj_reloaded, type(obj)) if isinstance(obj, np.ndarray): np.testing.assert_array_equal(obj_reloaded, obj) else: assert obj_reloaded == obj
def test_memmap_returned_as_regular_array(backend): data = np.ones(int(1e3)) # Check that child processes send temporary memmaps back as numpy arrays. [result] = Parallel(n_jobs=2, backend=backend, max_nbytes=100)( delayed(check_memmap_and_send_back)(data) for _ in range(1)) assert _get_backing_memmap(result) is None
def generate_arrays(n): for i in range(n): yield np.ones(10, dtype=np.float32) * i
def __init__(self): self.array_float = np.arange(100, dtype='float64') self.array_int = np.ones(100, dtype='int32') self.array_obj = np.array(['a', 10, 20.0], dtype='object')