def test_memory_usage(tmpdir, compress):
    # Verify memory stays within expected bounds.
    filename = tmpdir.join('test.pkl').strpath
    small_array = np.ones((10, 10))
    big_array = np.ones(shape=100 * int(1e6), dtype=np.uint8)
    small_matrix = np.matrix(small_array)
    big_matrix = np.matrix(big_array)

    for obj in (small_array, big_array, small_matrix, big_matrix):
        size = obj.nbytes / 1e6
        obj_filename = filename + str(np.random.randint(0, 1000))
        mem_used = memory_used(numpy_pickle.dump,

        # The memory used to dump the object shouldn't exceed the buffer
        # size used to write array chunks (16MB).
        write_buf_size = _IO_BUFFER_SIZE + 16 * 1024**2 / 1e6
        assert mem_used <= write_buf_size

        mem_used = memory_used(numpy_pickle.load, obj_filename)
        # memory used should be less than array size + buffer size used to
        # read the array chunk by chunk.
        read_buf_size = 32 + _IO_BUFFER_SIZE  # MiB
        assert mem_used < size + read_buf_size
def test_compressed_pickle_dump_and_load(tmpdir):
    expected_list = [
        np.arange(5, dtype=np.dtype('<i8')),
        np.arange(5, dtype=np.dtype('>i8')),
        np.arange(5, dtype=np.dtype('<f8')),
        np.arange(5, dtype=np.dtype('>f8')),
        np.array([1, 'abc', {
            'a': 1,
            'b': 2
        }], dtype='O'),
        np.arange(256, dtype=np.uint8).tobytes(),
        # np.matrix is a subclass of np.ndarray, here we want
        # to verify this type of object is correctly unpickled
        # among versions.
        np.matrix([0, 1, 2], dtype=np.dtype('<i8')),
        np.matrix([0, 1, 2], dtype=np.dtype('>i8')),
        u"C'est l'\xe9t\xe9 !"

    fname = tmpdir.join('temp.pkl.gz').strpath

    dumped_filenames = numpy_pickle.dump(expected_list, fname, compress=1)
    assert len(dumped_filenames) == 1
    result_list = numpy_pickle.load(fname)
    for result, expected in zip(result_list, expected_list):
        if isinstance(expected, np.ndarray):
            assert result.dtype == expected.dtype
            np.testing.assert_equal(result, expected)
            assert result == expected
def test_joblib_pickle_across_python_versions():
    # We need to be specific about dtypes in particular endianness
    # because the pickles can be generated on one architecture and
    # the tests run on another one. See
    # https://github.com/joblib/joblib/issues/279.
    expected_list = [np.arange(5, dtype=np.dtype('<i8')),
                     np.arange(5, dtype=np.dtype('<f8')),
                     np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'),
                     # .tostring actually returns bytes and is a
                     # compatibility alias for .tobytes which was
                     # added in 1.9.0
                     np.arange(256, dtype=np.uint8).tostring(),
                     # np.matrix is a subclass of np.ndarray, here we want
                     # to verify this type of object is correctly unpickled
                     # among versions.
                     np.matrix([0, 1, 2], dtype=np.dtype('<i8')),
                     u"C'est l'\xe9t\xe9 !"]

    # Testing all the compressed and non compressed
    # pickles in joblib/test/data. These pickles were generated by
    # the joblib/test/data/create_numpy_pickle.py script for the
    # relevant python, joblib and numpy versions.
    test_data_dir = os.path.dirname(os.path.abspath(data.__file__))

    pickle_extensions = ('.pkl', '.gz', '.gzip', '.bz2', '.xz', '.lzma', 'lz4')
    pickle_filenames = [os.path.join(test_data_dir, fn)
                        for fn in os.listdir(test_data_dir)
                        if any(fn.endswith(ext) for ext in pickle_extensions)]

    for fname in pickle_filenames:
        _check_pickle(fname, expected_list)
def test_file_handle_persistence():
    objs = [np.random.random((10, 10)),
            "some data",
            np.matrix([0, 1, 2])]
    fobjs = [bz2.BZ2File, gzip.GzipFile]
    if PY3_OR_LATER:
        import lzma
        fobjs += [lzma.LZMAFile]
    filename = env['filename'] + str(random.randint(0, 1000))

    for obj in objs:
        for fobj in fobjs:
            with fobj(filename, 'wb') as f:
                numpy_pickle.dump(obj, f)

            # using the same decompressor prevents from internally
            # decompress again.
            with fobj(filename, 'rb') as f:
                obj_reloaded = numpy_pickle.load(f)

            # when needed, the correct decompressor should be used when
            # passing a raw file handle.
            with open(filename, 'rb') as f:
                obj_reloaded_2 = numpy_pickle.load(f)

            if isinstance(obj, np.ndarray):
                np.testing.assert_array_equal(obj_reloaded, obj)
                np.testing.assert_array_equal(obj_reloaded_2, obj)
                assert obj_reloaded == obj
                assert obj_reloaded_2 == obj

def test_file_handle_persistence(tmpdir):
    objs = [np.random.random((10, 10)), "some data", np.matrix([0, 1, 2])]
    fobjs = [bz2.BZ2File, gzip.GzipFile]
    if lzma is not None:
        fobjs += [lzma.LZMAFile]
    filename = tmpdir.join('test.pkl').strpath

    for obj in objs:
        for fobj in fobjs:
            with fobj(filename, 'wb') as f:
                numpy_pickle.dump(obj, f)

            # using the same decompressor prevents from internally
            # decompress again.
            with fobj(filename, 'rb') as f:
                obj_reloaded = numpy_pickle.load(f)

            # when needed, the correct decompressor should be used when
            # passing a raw file handle.
            with open(filename, 'rb') as f:
                obj_reloaded_2 = numpy_pickle.load(f)

            if isinstance(obj, np.ndarray):
                np.testing.assert_array_equal(obj_reloaded, obj)
                np.testing.assert_array_equal(obj_reloaded_2, obj)
                assert obj_reloaded == obj
                assert obj_reloaded_2 == obj
def test_joblib_decompression_format_support():
    # We need to be specific about dtypes in particular endianness
    # because the pickles can be generated on one architecture and
    # the tests run on another one. See
    # https://github.com/joblib/joblib/issues/279.
    expected_list = [
        np.arange(5, dtype=np.dtype('<i8')),
        np.arange(5, dtype=np.dtype('<f8')),
        np.array([1, 'abc', {
            'a': 1,
            'b': 2
        }], dtype='O'),
        # .tostring actually returns bytes and is a
        # compatibility alias for .tobytes which was
        # added in 1.9.0
        np.arange(256, dtype=np.uint8).tostring(),
        # np.matrix is a subclass of np.ndarray, here we want
        # to verify this type of object is correctly unpickled
        # among versions.
        np.matrix([0, 1, 2], dtype=np.dtype('<i8')),
        u"C'est l'\xe9t\xe9 !"

    test_data_dir = os.path.dirname(os.path.abspath(data.__file__))

    extensions = ('.gz', '.gzip', '.bz2', '.xz', '.lzma')
    compress_filenames_list = [
        glob.glob(os.path.join(test_data_dir, ext)) for ext in extensions
    compress_filenames = sum(compress_filenames_list, [])

    for fname in compress_filenames:
        _check_compression_format(fname, expected_list)
def test_numpy_persistence():
    filename = env['filename']
    rnd = np.random.RandomState(0)
    a = rnd.random_sample((10, 2))
    for compress in (False, True, 0, 3):
        # We use 'a.T' to have a non C-contiguous array.
        for index, obj in enumerate(((a,), (a.T,), (a, a), [a, a, a])):
            # Change the file name to avoid side effects between tests
            this_filename = filename + str(random.randint(0, 1000))

            filenames = numpy_pickle.dump(obj, this_filename,

            # All is cached in one file
            nose.tools.assert_equal(len(filenames), 1)
            # Check that only one file was created
            nose.tools.assert_equal(filenames[0], this_filename)
            # Check that this file does exist
                os.path.exists(os.path.join(env['dir'], filenames[0])))

            # Unpickle the object
            obj_ = numpy_pickle.load(this_filename)
            # Check that the items are indeed arrays
            for item in obj_:
                nose.tools.assert_true(isinstance(item, np.ndarray))
            # And finally, check that all the values are equal.
            np.testing.assert_array_equal(np.array(obj), np.array(obj_))

        # Now test with array subclasses
        for obj in (np.matrix(np.zeros(10)),
                    np.memmap(filename + str(random.randint(0, 1000)) + 'mmap',
                              mode='w+', shape=4, dtype=np.float)):
            this_filename = filename + str(random.randint(0, 1000))
            filenames = numpy_pickle.dump(obj, this_filename,
            # All is cached in one file
            nose.tools.assert_equal(len(filenames), 1)

            obj_ = numpy_pickle.load(this_filename)
            if (type(obj) is not np.memmap and
                    hasattr(obj, '__array_prepare__')):
                # We don't reconstruct memmaps
                nose.tools.assert_true(isinstance(obj_, type(obj)))

            np.testing.assert_array_equal(obj_, obj)

        # Test with an object containing multiple numpy arrays
        obj = ComplexTestObject()
        filenames = numpy_pickle.dump(obj, this_filename,
        # All is cached in one file
        nose.tools.assert_equal(len(filenames), 1)

        obj_loaded = numpy_pickle.load(this_filename)
        nose.tools.assert_true(isinstance(obj_loaded, type(obj)))
        np.testing.assert_array_equal(obj_loaded.array_float, obj.array_float)
        np.testing.assert_array_equal(obj_loaded.array_int, obj.array_int)
        np.testing.assert_array_equal(obj_loaded.array_obj, obj.array_obj)
def test_in_memory_persistence():
    objs = [np.random.random((10, 10)), "some data", np.matrix([0, 1, 2])]
    for obj in objs:
        f = io.BytesIO()
        numpy_pickle.dump(obj, f)
        obj_reloaded = numpy_pickle.load(f)
        if isinstance(obj, np.ndarray):
            np.testing.assert_array_equal(obj_reloaded, obj)
            assert obj_reloaded == obj
def test_numpy_persistence(tmpdir, compress):
    filename = tmpdir.join('test.pkl').strpath
    rnd = np.random.RandomState(0)
    a = rnd.random_sample((10, 2))
    # We use 'a.T' to have a non C-contiguous array.
    for index, obj in enumerate(((a, ), (a.T, ), (a, a), [a, a, a])):
        filenames = numpy_pickle.dump(obj, filename, compress=compress)

        # All is cached in one file
        assert len(filenames) == 1
        # Check that only one file was created
        assert filenames[0] == filename
        # Check that this file does exist
        assert os.path.exists(filenames[0])

        # Unpickle the object
        obj_ = numpy_pickle.load(filename)
        # Check that the items are indeed arrays
        for item in obj_:
            assert isinstance(item, np.ndarray)
        # And finally, check that all the values are equal.
        np.testing.assert_array_equal(np.array(obj), np.array(obj_))

    # Now test with array subclasses
    for obj in (np.matrix(np.zeros(10)),
                np.memmap(filename + 'mmap',
        filenames = numpy_pickle.dump(obj, filename, compress=compress)
        # All is cached in one file
        assert len(filenames) == 1

        obj_ = numpy_pickle.load(filename)
        if (type(obj) is not np.memmap and hasattr(obj, '__array_prepare__')):
            # We don't reconstruct memmaps
            assert isinstance(obj_, type(obj))

        np.testing.assert_array_equal(obj_, obj)

    # Test with an object containing multiple numpy arrays
    obj = ComplexTestObject()
    filenames = numpy_pickle.dump(obj, filename, compress=compress)
    # All is cached in one file
    assert len(filenames) == 1

    obj_loaded = numpy_pickle.load(filename)
    assert isinstance(obj_loaded, type(obj))
    np.testing.assert_array_equal(obj_loaded.array_float, obj.array_float)
    np.testing.assert_array_equal(obj_loaded.array_int, obj.array_int)
    np.testing.assert_array_equal(obj_loaded.array_obj, obj.array_obj)
def test_memory_usage():
    # Verify memory stays within expected bounds.
    filename = env['filename']
    small_array = np.ones((10, 10))
    big_array = np.ones(shape=100 * int(1e6), dtype=np.uint8)
    small_matrix = np.matrix(small_array)
    big_matrix = np.matrix(big_array)
    for compress in (True, False):
        for obj in (small_array, big_array, small_matrix, big_matrix):
            size = obj.nbytes / 1e6
            obj_filename = filename + str(np.random.randint(0, 1000))
            mem_used = memory_used(numpy_pickle.dump,
                                   obj, obj_filename, compress=compress)

            # The memory used to dump the object shouldn't exceed the buffer
            # size used to write array chunks (16MB).
            write_buf_size = _IO_BUFFER_SIZE + 16 * 1024 ** 2 / 1e6
            assert mem_used <= write_buf_size

            mem_used = memory_used(numpy_pickle.load, obj_filename)
            # memory used should be less than array size + buffer size used to
            # read the array chunk by chunk.
            read_buf_size = 32 + _IO_BUFFER_SIZE  # MiB
            assert mem_used < size + read_buf_size
def test_numpy_persistence():
    filename = env['filename']
    rnd = np.random.RandomState(0)
    a = rnd.random_sample((10, 2))
    for compress in (False, True, 0, 3):
        # We use 'a.T' to have a non C-contiguous array.
        for index, obj in enumerate(((a, ), (a.T, ), (a, a), [a, a, a])):
            # Change the file name to avoid side effects between tests
            this_filename = filename + str(random.randint(0, 1000))

            filenames = numpy_pickle.dump(obj,

            # All is cached in one file
            nose.tools.assert_equal(len(filenames), 1)
            # Check that only one file was created
            nose.tools.assert_equal(filenames[0], this_filename)
            # Check that this file does exist
                os.path.exists(os.path.join(env['dir'], filenames[0])))

            # Unpickle the object
            obj_ = numpy_pickle.load(this_filename)
            # Check that the items are indeed arrays
            for item in obj_:
                nose.tools.assert_true(isinstance(item, np.ndarray))
            # And finally, check that all the values are equal.
            np.testing.assert_array_equal(np.array(obj), np.array(obj_))

        # Now test with array subclasses
        for obj in (np.matrix(np.zeros(10)),
                    np.memmap(filename + str(random.randint(0, 1000)) + 'mmap',
            this_filename = filename + str(random.randint(0, 1000))
            filenames = numpy_pickle.dump(obj,
            # All is cached in one file
            nose.tools.assert_equal(len(filenames), 1)

            obj_ = numpy_pickle.load(this_filename)
            if (type(obj) is not np.memmap
                    and hasattr(obj, '__array_prepare__')):
                # We don't reconstruct memmaps
                nose.tools.assert_true(isinstance(obj_, type(obj)))

            np.testing.assert_array_equal(obj_, obj)

        # Test with an object containing multiple numpy arrays
        obj = ComplexTestObject()
        filenames = numpy_pickle.dump(obj, this_filename, compress=compress)
        # All is cached in one file
        nose.tools.assert_equal(len(filenames), 1)

        obj_loaded = numpy_pickle.load(this_filename)
        nose.tools.assert_true(isinstance(obj_loaded, type(obj)))
        np.testing.assert_array_equal(obj_loaded.array_float, obj.array_float)
        np.testing.assert_array_equal(obj_loaded.array_int, obj.array_int)
        np.testing.assert_array_equal(obj_loaded.array_obj, obj.array_obj)