Exemplo n.º 1
0
def test_pool_with_memmap_array_view(tmpdir):
    """Check that subprocess can access and update shared memory array"""
    assert_array_equal = np.testing.assert_array_equal

    # Fork the subprocess before allocating the objects to be passed
    pool_temp_folder = tmpdir.mkdir('pool').strpath
    p = MemmapingPool(10, max_nbytes=2, temp_folder=pool_temp_folder)
    try:

        filename = tmpdir.join('test.mmap').strpath
        a = np.memmap(filename, dtype=np.float32, shape=(3, 5), mode='w+')
        a.fill(1.0)

        # Create an ndarray view on the memmap instance
        a_view = np.asarray(a)
        assert not isinstance(a_view, np.memmap)
        assert has_shareable_memory(a_view)

        p.map(inplace_double, [(a_view, (i, j), 1.0)
                               for i in range(a.shape[0])
                               for j in range(a.shape[1])])

        # Both a and the a_view have been updated
        assert_array_equal(a, 2 * np.ones(a.shape))
        assert_array_equal(a_view, 2 * np.ones(a.shape))

        # Passing memmap array view to the pool should not trigger the
        # creation of new files on the FS
        assert os.listdir(pool_temp_folder) == []

    finally:
        p.terminate()
        del p
Exemplo n.º 2
0
def test_memmaping_on_dev_shm():
    """Check that MemmapingPool uses /dev/shm when possible"""
    p = MemmapingPool(3, max_nbytes=10)
    try:
        # Check that the pool has correctly detected the presence of the
        # shared memory filesystem.
        pool_temp_folder = p._temp_folder
        folder_prefix = '/dev/shm/joblib_memmaping_pool_'
        assert_true(pool_temp_folder.startswith(folder_prefix))
        assert_true(os.path.exists(pool_temp_folder))

        # Try with a file larger than the memmap threshold of 10 bytes
        a = np.ones(100, dtype=np.float64)
        assert_equal(a.nbytes, 800)
        p.map(id, [a] * 10)
        # a should have been memmaped to the pool temp folder: the joblib
        # pickling procedure generate a .pkl and a .npy file:
        assert_equal(len(os.listdir(pool_temp_folder)), 2)

        b = np.ones(100, dtype=np.float64)
        assert_equal(b.nbytes, 800)
        p.map(id, [b] * 10)
        # A copy of both a and b are not stored in the shared memory folder
        assert_equal(len(os.listdir(pool_temp_folder)), 4)

    finally:
        # Cleanup open file descriptors
        p.terminate()
        del p

    # The temp folder is cleaned up upon pool termination
    assert_false(os.path.exists(pool_temp_folder))
Exemplo n.º 3
0
def test_memmaping_on_dev_shm():
    """Check that MemmapingPool uses /dev/shm when possible"""
    p = MemmapingPool(3, max_nbytes=10)
    try:
        # Check that the pool has correctly detected the presence of the
        # shared memory filesystem.
        pool_temp_folder = p._temp_folder
        folder_prefix = '/dev/shm/joblib_memmaping_pool_'
        assert pool_temp_folder.startswith(folder_prefix)
        assert os.path.exists(pool_temp_folder)

        # Try with a file larger than the memmap threshold of 10 bytes
        a = np.ones(100, dtype=np.float64)
        assert a.nbytes == 800
        p.map(id, [a] * 10)
        # a should have been memmaped to the pool temp folder: the joblib
        # pickling procedure generate one .pkl file:
        assert len(os.listdir(pool_temp_folder)) == 1

        # create a new array with content that is different from 'a' so that
        # it is mapped to a different file in the temporary folder of the
        # pool.
        b = np.ones(100, dtype=np.float64) * 2
        assert b.nbytes == 800
        p.map(id, [b] * 10)
        # A copy of both a and b are now stored in the shared memory folder
        assert len(os.listdir(pool_temp_folder)) == 2

    finally:
        # Cleanup open file descriptors
        p.terminate()
        del p

    # The temp folder is cleaned up upon pool termination
    assert not os.path.exists(pool_temp_folder)
Exemplo n.º 4
0
def test_weak_array_key_map():

    def assert_empty_after_gc_collect(container, retries=3):
        for i in range(retries):
            if len(container) == 0:
                return
            gc.collect()
        assert len(container) == 0

    a = np.ones(42)
    m = _WeakArrayKeyMap()
    m.set(a, 'a')
    assert m.get(a) == 'a'

    b = a
    assert m.get(b) == 'a'
    m.set(b, 'b')
    assert m.get(a) == 'b'

    del a
    gc.collect()
    assert len(m._data) == 1
    assert m.get(b) == 'b'

    del b
    assert_empty_after_gc_collect(m._data)

    c = np.ones(42)
    m.set(c, 'c')
    assert len(m._data) == 1
    assert m.get(c) == 'c'

    with raises(KeyError):
        m.get(np.ones(42))

    del c
    assert_empty_after_gc_collect(m._data)

    # Check that creating and dropping numpy arrays with potentially the same
    # object id will not cause the map to get confused.
    def get_set_get_collect(m, i):
        a = np.ones(42)
        with raises(KeyError):
            m.get(a)
        m.set(a, i)
        assert m.get(a) == i
        return id(a)

    unique_ids = set([get_set_get_collect(m, i) for i in range(1000)])
    if platform.python_implementation() == 'CPython':
        # On CPython (at least) the same id is often reused many times for the
        # temporary arrays created under the local scope of the
        # get_set_get_collect function without causing any spurious lookups /
        # insertions in the map.
        assert len(unique_ids) < 100
Exemplo n.º 5
0
def test_pool_with_memmap(tmpdir_path):
    """Check that subprocess can access and update shared memory memmap"""
    assert_array_equal = np.testing.assert_array_equal

    # Fork the subprocess before allocating the objects to be passed
    pool_temp_folder = os.path.join(tmpdir_path, 'pool')
    os.makedirs(pool_temp_folder)
    p = MemmapingPool(10, max_nbytes=2, temp_folder=pool_temp_folder)
    try:
        filename = os.path.join(tmpdir_path, 'test.mmap')
        a = np.memmap(filename, dtype=np.float32, shape=(3, 5), mode='w+')
        a.fill(1.0)

        p.map(inplace_double, [(a, (i, j), 1.0)
                               for i in range(a.shape[0])
                               for j in range(a.shape[1])])

        assert_array_equal(a, 2 * np.ones(a.shape))

        # Open a copy-on-write view on the previous data
        b = np.memmap(filename, dtype=np.float32, shape=(5, 3), mode='c')

        p.map(inplace_double, [(b, (i, j), 2.0)
                               for i in range(b.shape[0])
                               for j in range(b.shape[1])])

        # Passing memmap instances to the pool should not trigger the creation
        # of new files on the FS
        assert os.listdir(pool_temp_folder) == []

        # the original data is untouched
        assert_array_equal(a, 2 * np.ones(a.shape))
        assert_array_equal(b, 2 * np.ones(b.shape))

        # readonly maps can be read but not updated
        c = np.memmap(filename, dtype=np.float32, shape=(10,), mode='r',
                      offset=5 * 4)

        assert_raises(AssertionError, p.map, check_array,
                      [(c, i, 3.0) for i in range(c.shape[0])])

        # depending on the version of numpy one can either get a RuntimeError
        # or a ValueError
        assert_raises((RuntimeError, ValueError), p.map, inplace_double,
                      [(c, i, 2.0) for i in range(c.shape[0])])
    finally:
        # Clean all filehandlers held by the pool
        p.terminate()
        del p
Exemplo n.º 6
0
def test_numpy_persistence_bufferred_array_compression():
    big_array = np.ones((_IO_BUFFER_SIZE + 100), dtype=np.uint8)
    filename = env['filename'] + str(random.randint(0, 1000))
    numpy_pickle.dump(big_array, filename, compress=True)
    arr_reloaded = numpy_pickle.load(filename)

    np.testing.assert_array_equal(big_array, arr_reloaded)
Exemplo n.º 7
0
def test_joblib_compression_formats():
    compresslevels = (1, 3, 6)
    filename = env['filename'] + str(random.randint(0, 1000))
    objects = (np.ones(shape=(100, 100), dtype='f8'),
               range(10),
               {'a': 1, 2: 'b'}, [], (), {}, 0, 1.0)

    for compress in compresslevels:
        for cmethod in _COMPRESSORS:
            dump_filename = filename + "." + cmethod
            for obj in objects:
                if not PY3_OR_LATER and cmethod in ('xz', 'lzma'):
                    # Lzma module only available for python >= 3.3
                    msg = "{} compression is only available".format(cmethod)
                    assert_raises_regex(NotImplementedError, msg,
                                        numpy_pickle.dump, obj, dump_filename,
                                        compress=(cmethod, compress))
                else:
                    numpy_pickle.dump(obj, dump_filename,
                                      compress=(cmethod, compress))
                    # Verify the file contains the right magic number
                    with open(dump_filename, 'rb') as f:
                        assert _detect_compressor(f) == cmethod
                    # Verify the reloaded object is correct
                    obj_reloaded = numpy_pickle.load(dump_filename)
                    assert isinstance(obj_reloaded, type(obj))
                    if isinstance(obj, np.ndarray):
                        np.testing.assert_array_equal(obj_reloaded, obj)
                    else:
                        assert obj_reloaded == obj
                    os.remove(dump_filename)
Exemplo n.º 8
0
 def get_set_get_collect(m, i):
     a = np.ones(42)
     with raises(KeyError):
         m.get(a)
     m.set(a, i)
     assert m.get(a) == i
     return id(a)
Exemplo n.º 9
0
def test_memmaping_pool_for_large_arrays():
    """Check that large arrays are not copied in memory"""
    assert_array_equal = np.testing.assert_array_equal

    # Check that the tempfolder is empty
    assert_equal(os.listdir(TEMP_FOLDER), [])

    # Build an array reducers that automaticaly dump large array content
    # to filesystem backed memmap instances to avoid memory explosion
    p = MemmapingPool(3, max_nbytes=40, temp_folder=TEMP_FOLDER)
    try:
        # The tempory folder for the pool is not provisioned in advance
        assert_equal(os.listdir(TEMP_FOLDER), [])
        assert_false(os.path.exists(p._temp_folder))

        small = np.ones(5, dtype=np.float32)
        assert_equal(small.nbytes, 20)
        p.map(check_array, [(small, i, 1.0) for i in range(small.shape[0])])

        # Memory has been copied, the pool filesystem folder is unused
        assert_equal(os.listdir(TEMP_FOLDER), [])

        # Try with a file larger than the memmap threshold of 40 bytes
        large = np.ones(100, dtype=np.float64)
        assert_equal(large.nbytes, 800)
        p.map(check_array, [(large, i, 1.0) for i in range(large.shape[0])])

        # The data has been dumped in a temp folder for subprocess to share it
        # without per-child memory copies
        assert_true(os.path.isdir(p._temp_folder))
        dumped_filenames = os.listdir(p._temp_folder)
        assert_equal(len(dumped_filenames), 2)

        # Check that memmory mapping is not triggered for arrays with
        # dtype='object'
        objects = np.array(['abc'] * 100, dtype='object')
        results = p.map(has_shareable_memory, [objects])
        assert_false(results[0])

    finally:
        # check FS garbage upon pool termination
        p.terminate()
        assert_false(os.path.exists(p._temp_folder))
        del p
Exemplo n.º 10
0
def test_memmapping_pool_for_large_arrays(factory, tmpdir):
    """Check that large arrays are not copied in memory"""

    # Check that the tempfolder is empty
    assert os.listdir(tmpdir.strpath) == []

    # Build an array reducers that automaticaly dump large array content
    # to filesystem backed memmap instances to avoid memory explosion
    p = factory(3, max_nbytes=40, temp_folder=tmpdir.strpath, verbose=2)
    try:
        # The temporary folder for the pool is not provisioned in advance
        assert os.listdir(tmpdir.strpath) == []
        assert not os.path.exists(p._temp_folder)

        small = np.ones(5, dtype=np.float32)
        assert small.nbytes == 20
        p.map(check_array, [(small, i, 1.0) for i in range(small.shape[0])])

        # Memory has been copied, the pool filesystem folder is unused
        assert os.listdir(tmpdir.strpath) == []

        # Try with a file larger than the memmap threshold of 40 bytes
        large = np.ones(100, dtype=np.float64)
        assert large.nbytes == 800
        p.map(check_array, [(large, i, 1.0) for i in range(large.shape[0])])

        # The data has been dumped in a temp folder for subprocess to share it
        # without per-child memory copies
        assert os.path.isdir(p._temp_folder)
        dumped_filenames = os.listdir(p._temp_folder)
        assert len(dumped_filenames) == 1

        # Check that memory mapping is not triggered for arrays with
        # dtype='object'
        objects = np.array(['abc'] * 100, dtype='object')
        results = p.map(has_shareable_memory, [objects])
        assert not results[0]

    finally:
        # check FS garbage upon pool termination
        p.terminate()
        assert not os.path.exists(p._temp_folder)
        del p
Exemplo n.º 11
0
def test_memmapping_on_large_enough_dev_shm(factory):
    """Check that memmapping uses /dev/shm when possible"""
    orig_size = jmr.SYSTEM_SHARED_MEM_FS_MIN_SIZE
    try:
        # Make joblib believe that it can use /dev/shm even when running on a
        # CI container where the size of the /dev/shm is not very large (that
        # is at least 32 MB instead of 2 GB by default).
        jmr.SYSTEM_SHARED_MEM_FS_MIN_SIZE = int(32e6)
        p = factory(3, max_nbytes=10)
        try:
            # Check that the pool has correctly detected the presence of the
            # shared memory filesystem.
            pool_temp_folder = p._temp_folder
            folder_prefix = '/dev/shm/joblib_memmapping_folder_'
            assert pool_temp_folder.startswith(folder_prefix)
            assert os.path.exists(pool_temp_folder)

            # Try with a file larger than the memmap threshold of 10 bytes
            a = np.ones(100, dtype=np.float64)
            assert a.nbytes == 800
            p.map(id, [a] * 10)
            # a should have been memmapped to the pool temp folder: the joblib
            # pickling procedure generate one .pkl file:
            assert len(os.listdir(pool_temp_folder)) == 1

            # create a new array with content that is different from 'a' so
            # that it is mapped to a different file in the temporary folder of
            # the pool.
            b = np.ones(100, dtype=np.float64) * 2
            assert b.nbytes == 800
            p.map(id, [b] * 10)
            # A copy of both a and b are now stored in the shared memory folder
            assert len(os.listdir(pool_temp_folder)) == 2
        finally:
            # Cleanup open file descriptors
            p.terminate()
            del p
        # The temp folder is cleaned up upon pool termination
        assert not os.path.exists(pool_temp_folder)
    finally:
        jmr.SYSTEM_SHARED_MEM_FS_MIN_SIZE = orig_size
Exemplo n.º 12
0
def test_non_contiguous_array_pickling(tmpdir):
    filename = tmpdir.join('test.pkl').strpath

    for array in [  # Array that triggers a contiguousness issue with nditer,
            # see https://github.com/joblib/joblib/pull/352 and see
            # https://github.com/joblib/joblib/pull/353
            np.asfortranarray([[1, 2], [3, 4]])[1:],
            # Non contiguous array with works fine with nditer
            np.ones((10, 50, 20), order='F')[:, :1, :]
    ]:
        assert not array.flags.c_contiguous
        assert not array.flags.f_contiguous
        numpy_pickle.dump(array, filename)
        array_reloaded = numpy_pickle.load(filename)
        np.testing.assert_array_equal(array_reloaded, array)
Exemplo n.º 13
0
def test_memory_usage():
    # Verify memory stays within expected bounds.
    filename = env['filename']
    small_array = np.ones((10, 10))
    big_array = np.ones(shape=100 * int(1e6), dtype=np.uint8)
    small_matrix = np.matrix(small_array)
    big_matrix = np.matrix(big_array)
    for compress in (True, False):
        for obj in (small_array, big_array, small_matrix, big_matrix):
            size = obj.nbytes / 1e6
            obj_filename = filename + str(np.random.randint(0, 1000))
            mem_used = memory_used(numpy_pickle.dump,
                                   obj, obj_filename, compress=compress)

            # The memory used to dump the object shouldn't exceed the buffer
            # size used to write array chunks (16MB).
            write_buf_size = _IO_BUFFER_SIZE + 16 * 1024 ** 2 / 1e6
            assert mem_used <= write_buf_size

            mem_used = memory_used(numpy_pickle.load, obj_filename)
            # memory used should be less than array size + buffer size used to
            # read the array chunk by chunk.
            read_buf_size = 32 + _IO_BUFFER_SIZE  # MiB
            assert mem_used < size + read_buf_size
Exemplo n.º 14
0
def test_non_contiguous_array_pickling():
    filename = env['filename'] + str(random.randint(0, 1000))

    for array in [  # Array that triggers a contiguousness issue with nditer,
                    # see https://github.com/joblib/joblib/pull/352 and see
                    # https://github.com/joblib/joblib/pull/353
                    np.asfortranarray([[1, 2], [3, 4]])[1:],
                    # Non contiguous array with works fine with nditer
                    np.ones((10, 50, 20), order='F')[:, :1, :]]:
        assert not array.flags.c_contiguous
        assert not array.flags.f_contiguous
        numpy_pickle.dump(array, filename)
        array_reloaded = numpy_pickle.load(filename)
        np.testing.assert_array_equal(array_reloaded, array)
        os.remove(filename)
def test_memory_usage(tmpdir, compress):
    # Verify memory stays within expected bounds.
    filename = tmpdir.join('test.pkl').strpath
    small_array = np.ones((10, 10))
    big_array = np.ones(shape=100 * int(1e6), dtype=np.uint8)
    small_matrix = np.matrix(small_array)
    big_matrix = np.matrix(big_array)

    for obj in (small_array, big_array, small_matrix, big_matrix):
        size = obj.nbytes / 1e6
        obj_filename = filename + str(np.random.randint(0, 1000))
        mem_used = memory_used(numpy_pickle.dump,
                               obj, obj_filename, compress=compress)

        # The memory used to dump the object shouldn't exceed the buffer
        # size used to write array chunks (16MB).
        write_buf_size = _IO_BUFFER_SIZE + 16 * 1024 ** 2 / 1e6
        assert mem_used <= write_buf_size

        mem_used = memory_used(numpy_pickle.load, obj_filename)
        # memory used should be less than array size + buffer size used to
        # read the array chunk by chunk.
        read_buf_size = 32 + _IO_BUFFER_SIZE  # MiB
        assert mem_used < size + read_buf_size
Exemplo n.º 16
0
def test_non_contiguous_array_pickling():
    filename = env['filename'] + str(random.randint(0, 1000))

    for array in [  # Array that triggers a contiguousness issue with nditer,
            # see https://github.com/joblib/joblib/pull/352 and see
            # https://github.com/joblib/joblib/pull/353
            np.asfortranarray([[1, 2], [3, 4]])[1:],
            # Non contiguous array with works fine with nditer
            np.ones((10, 50, 20), order='F')[:, :1, :]
    ]:
        nose.tools.assert_false(array.flags.c_contiguous)
        nose.tools.assert_false(array.flags.f_contiguous)
        numpy_pickle.dump(array, filename)
        array_reloaded = numpy_pickle.load(filename)
        np.testing.assert_array_equal(array_reloaded, array)
        os.remove(filename)
Exemplo n.º 17
0
def test_memory_numpy_check_mmap_mode(tmpdir):
    """Check that mmap_mode is respected even at the first call"""

    memory = Memory(location=tmpdir.strpath, mmap_mode='r', verbose=0)

    @memory.cache()
    def twice(a):
        return a * 2

    a = np.ones(3)

    b = twice(a)
    c = twice(a)

    assert isinstance(c, np.memmap)
    assert c.mode == 'r'

    assert isinstance(b, np.memmap)
    assert b.mode == 'r'
Exemplo n.º 18
0
def test_nested_parallelism_with_dask():
    distributed = pytest.importorskip('distributed')
    client = distributed.Client(n_workers=2, threads_per_worker=2)  # noqa

    # 10 MB of data as argument to trigger implicit scattering
    data = np.ones(int(1e7), dtype=np.uint8)
    for i in range(2):
        with parallel_backend('dask'):
            backend_types_and_levels = _recursive_backend_info(data=data)
        assert len(backend_types_and_levels) == 4
        assert all(name == 'DaskDistributedBackend'
                   for name, _ in backend_types_and_levels)

    # No argument
    with parallel_backend('dask'):
        backend_types_and_levels = _recursive_backend_info()
    assert len(backend_types_and_levels) == 4
    assert all(name == 'DaskDistributedBackend'
               for name, _ in backend_types_and_levels)
Exemplo n.º 19
0
def test_memory_numpy_check_mmap_mode(tmpdir):
    """Check that mmap_mode is respected even at the first call"""

    memory = Memory(cachedir=tmpdir.strpath, mmap_mode='r', verbose=0)
    memory.clear(warn=False)

    @memory.cache()
    def twice(a):
        return a * 2

    a = np.ones(3)

    b = twice(a)
    c = twice(a)

    assert isinstance(c, np.memmap)
    assert c.mode == 'r'

    assert isinstance(b, np.memmap)
    assert b.mode == 'r'
Exemplo n.º 20
0
def test_memory_numpy_check_mmap_mode():
    """Check that mmap_mode is respected even at the first call"""

    memory = Memory(cachedir=env["dir"], mmap_mode="r", verbose=0)
    memory.clear(warn=False)

    @memory.cache()
    def twice(a):
        return a * 2

    a = np.ones(3)

    b = twice(a)
    c = twice(a)

    nose.tools.assert_true(isinstance(c, np.memmap))
    nose.tools.assert_equal(c.mode, "r")

    nose.tools.assert_true(isinstance(b, np.memmap))
    nose.tools.assert_equal(b.mode, "r")
Exemplo n.º 21
0
def test_memory_numpy_check_mmap_mode():
    """Check that mmap_mode is respected even at the first call"""

    memory = Memory(cachedir=env['dir'], mmap_mode='r', verbose=0)
    memory.clear(warn=False)

    @memory.cache()
    def twice(a):
        return a * 2

    a = np.ones(3)

    b = twice(a)
    c = twice(a)

    nose.tools.assert_true(isinstance(c, np.memmap))
    nose.tools.assert_equal(c.mode, 'r')

    nose.tools.assert_true(isinstance(b, np.memmap))
    nose.tools.assert_equal(b.mode, 'r')
Exemplo n.º 22
0
def test_memmaping_pool_for_large_arrays_in_return(tmpdir):
    """Check that large arrays are not copied in memory in return"""
    assert_array_equal = np.testing.assert_array_equal

    # Build an array reducers that automaticaly dump large array content
    # but check that the returned datastructure are regular arrays to avoid
    # passing a memmap array pointing to a pool controlled temp folder that
    # might be confusing to the user

    # The MemmapingPool user can always return numpy.memmap object explicitly
    # to avoid memory copy
    p = MemmapingPool(3, max_nbytes=10, temp_folder=tmpdir.strpath)
    try:
        res = p.apply_async(np.ones, args=(1000,))
        large = res.get()
        assert not has_shareable_memory(large)
        assert_array_equal(large, np.ones(1000))
    finally:
        p.terminate()
        del p
Exemplo n.º 23
0
def test_memmapping_pool_for_large_arrays_in_return(factory, tmpdir):
    """Check that large arrays are not copied in memory in return"""
    assert_array_equal = np.testing.assert_array_equal

    # Build an array reducers that automaticaly dump large array content
    # but check that the returned datastructure are regular arrays to avoid
    # passing a memmap array pointing to a pool controlled temp folder that
    # might be confusing to the user

    # The MemmappingPool user can always return numpy.memmap object explicitly
    # to avoid memory copy
    p = factory(3, max_nbytes=10, temp_folder=tmpdir.strpath)
    try:
        res = p.apply_async(np.ones, args=(1000, ))
        large = res.get()
        assert not has_shareable_memory(large)
        assert_array_equal(large, np.ones(1000))
    finally:
        p.terminate()
        del p
Exemplo n.º 24
0
def test_memmaping_pool_for_large_arrays_disabled():
    """Check that large arrays memmaping can be disabled"""
    # Set max_nbytes to None to disable the auto memmaping feature
    p = MemmapingPool(3, max_nbytes=None, temp_folder=TEMP_FOLDER)
    try:

        # Check that the tempfolder is empty
        assert_equal(os.listdir(TEMP_FOLDER), [])

        # Try with a file largish than the memmap threshold of 40 bytes
        large = np.ones(100, dtype=np.float64)
        assert_equal(large.nbytes, 800)
        p.map(check_array, [(large, i, 1.0) for i in range(large.shape[0])])

        # Check that the tempfolder is still empty
        assert_equal(os.listdir(TEMP_FOLDER), [])

    finally:
        # Cleanup open file descriptors
        p.terminate()
        del p
Exemplo n.º 25
0
def test_memmaping_pool_for_large_arrays_disabled(tmpdir):
    """Check that large arrays memmaping can be disabled"""
    # Set max_nbytes to None to disable the auto memmaping feature
    p = MemmapingPool(3, max_nbytes=None, temp_folder=tmpdir.strpath)
    try:

        # Check that the tempfolder is empty
        assert os.listdir(tmpdir.strpath) == []

        # Try with a file largish than the memmap threshold of 40 bytes
        large = np.ones(100, dtype=np.float64)
        assert large.nbytes == 800
        p.map(check_array, [(large, i, 1.0) for i in range(large.shape[0])])

        # Check that the tempfolder is still empty
        assert os.listdir(tmpdir.strpath) == []

    finally:
        # Cleanup open file descriptors
        p.terminate()
        del p
Exemplo n.º 26
0
def test_memmapping_pool_for_large_arrays_disabled(factory, tmpdir):
    """Check that large arrays memmapping can be disabled"""
    # Set max_nbytes to None to disable the auto memmapping feature
    p = factory(3, max_nbytes=None, temp_folder=tmpdir.strpath)
    try:

        # Check that the tempfolder is empty
        assert os.listdir(tmpdir.strpath) == []

        # Try with a file largish than the memmap threshold of 40 bytes
        large = np.ones(100, dtype=np.float64)
        assert large.nbytes == 800
        p.map(check_array, [(large, i, 1.0) for i in range(large.shape[0])])

        # Check that the tempfolder is still empty
        assert os.listdir(tmpdir.strpath) == []

    finally:
        # Cleanup open file descriptors
        p.terminate()
        del p
Exemplo n.º 27
0
def test_memory_numpy_check_mmap_mode(tmpdir, monkeypatch):
    """Check that mmap_mode is respected even at the first call"""

    memory = Memory(location=tmpdir.strpath, mmap_mode='r', verbose=0)

    @memory.cache()
    def twice(a):
        return a * 2

    a = np.ones(3)

    b = twice(a)
    c = twice(a)

    assert isinstance(c, np.memmap)
    assert c.mode == 'r'

    assert isinstance(b, np.memmap)
    assert b.mode == 'r'

    # Corrupts the file,  Deleting b and c mmaps
    # is necessary to be able edit the file
    del b
    del c
    gc.collect()
    corrupt_single_cache_item(memory)

    # Make sure that corrupting the file causes recomputation and that
    # a warning is issued.
    recorded_warnings = monkeypatch_cached_func_warn(twice, monkeypatch)
    d = twice(a)
    assert len(recorded_warnings) == 1
    exception_msg = 'Exception while loading results'
    assert exception_msg in recorded_warnings[0]
    # Asserts that the recomputation returns a mmap
    assert isinstance(d, np.memmap)
    assert d.mode == 'r'
Exemplo n.º 28
0
def test_joblib_compression_formats():
    compresslevels = (1, 3, 6)
    filename = env['filename'] + str(random.randint(0, 1000))
    objects = (np.ones(shape=(100, 100), dtype='f8'), range(10), {
        'a': 1,
        2: 'b'
    }, [], (), {}, 0, 1.0)

    for compress in compresslevels:
        for cmethod in _COMPRESSORS:
            dump_filename = filename + "." + cmethod
            for obj in objects:
                if not PY3_OR_LATER and cmethod in ('xz', 'lzma'):
                    # Lzma module only available for python >= 3.3
                    msg = "{0} compression is only available".format(cmethod)
                    assert_raises_regex(NotImplementedError,
                                        msg,
                                        numpy_pickle.dump,
                                        obj,
                                        dump_filename,
                                        compress=(cmethod, compress))
                else:
                    numpy_pickle.dump(obj,
                                      dump_filename,
                                      compress=(cmethod, compress))
                    # Verify the file contains the right magic number
                    with open(dump_filename, 'rb') as f:
                        nose.tools.assert_equal(_detect_compressor(f), cmethod)
                    # Verify the reloaded object is correct
                    obj_reloaded = numpy_pickle.load(dump_filename)
                    nose.tools.assert_true(isinstance(obj_reloaded, type(obj)))
                    if isinstance(obj, np.ndarray):
                        np.testing.assert_array_equal(obj_reloaded, obj)
                    else:
                        nose.tools.assert_equal(obj_reloaded, obj)
                    os.remove(dump_filename)
Exemplo n.º 29
0
def test_memory_numpy_check_mmap_mode(tmpdir, monkeypatch):
    """Check that mmap_mode is respected even at the first call"""

    memory = Memory(location=tmpdir.strpath, mmap_mode='r', verbose=0)

    @memory.cache()
    def twice(a):
        return a * 2

    a = np.ones(3)

    b = twice(a)
    c = twice(a)

    assert isinstance(c, np.memmap)
    assert c.mode == 'r'

    assert isinstance(b, np.memmap)
    assert b.mode == 'r'

    # Corrupts the file,  Deleting b and c mmaps
    # is necessary to be able edit the file
    del b
    del c
    corrupt_single_cache_item(memory)

    # Make sure that corrupting the file causes recomputation and that
    # a warning is issued.
    recorded_warnings = monkeypatch_cached_func_warn(twice, monkeypatch)
    d = twice(a)
    assert len(recorded_warnings) == 1
    exception_msg = 'Exception while loading results'
    assert exception_msg in recorded_warnings[0]
    # Asserts that the recomputation returns a mmap
    assert isinstance(d, np.memmap)
    assert d.mode == 'r'
Exemplo n.º 30
0
def test_joblib_compression_formats(tmpdir, compress, cmethod):
    filename = tmpdir.join('test.pkl').strpath
    objects = (np.ones(shape=(100, 100), dtype='f8'), range(10), {
        'a': 1,
        2: 'b'
    }, [], (), {}, 0, 1.0)

    dump_filename = filename + "." + cmethod
    for obj in objects:
        if not PY3_OR_LATER and cmethod in ('lzma', 'xz', 'lz4'):
            # Lzma module only available for python >= 3.3
            msg = "{} compression is only available".format(cmethod)
            error = NotImplementedError
            if cmethod == 'lz4':
                error = ValueError
            with raises(error) as excinfo:
                numpy_pickle.dump(obj,
                                  dump_filename,
                                  compress=(cmethod, compress))
            excinfo.match(msg)
        elif cmethod == 'lz4' and with_lz4.args[0]:
            # Skip the test if lz4 is not installed. We here use the with_lz4
            # skipif fixture whose argument is True when lz4 is not installed
            raise SkipTest("lz4 is not installed.")
        else:
            numpy_pickle.dump(obj, dump_filename, compress=(cmethod, compress))
            # Verify the file contains the right magic number
            with open(dump_filename, 'rb') as f:
                assert _detect_compressor(f) == cmethod
            # Verify the reloaded object is correct
            obj_reloaded = numpy_pickle.load(dump_filename)
            assert isinstance(obj_reloaded, type(obj))
            if isinstance(obj, np.ndarray):
                np.testing.assert_array_equal(obj_reloaded, obj)
            else:
                assert obj_reloaded == obj
Exemplo n.º 31
0
def test_memmap_returned_as_regular_array(backend):
    data = np.ones(int(1e3))
    # Check that child processes send temporary memmaps back as numpy arrays.
    [result] = Parallel(n_jobs=2, backend=backend, max_nbytes=100)(
        delayed(check_memmap_and_send_back)(data) for _ in range(1))
    assert _get_backing_memmap(result) is None
Exemplo n.º 32
0
 def generate_arrays(n):
     for i in range(n):
         yield np.ones(10, dtype=np.float32) * i
Exemplo n.º 33
0
 def __init__(self):
     self.array_float = np.arange(100, dtype='float64')
     self.array_int = np.ones(100, dtype='int32')
     self.array_obj = np.array(['a', 10, 20.0], dtype='object')
Exemplo n.º 34
0
 def __init__(self):
     self.array_float = np.arange(100, dtype='float64')
     self.array_int = np.ones(100, dtype='int32')
     self.array_obj = np.array(['a', 10, 20.0], dtype='object')
Exemplo n.º 35
0
 def generate_arrays(n):
     for i in range(n):
         yield np.ones(10, dtype=np.float32) * i