Exemplo n.º 1
0
def test_disk_used(tmpdir):
    cachedir = tmpdir.strpath
    # Not write a file that is 1M big in this directory, and check the
    # size. The reason we use such a big file is that it makes us robust
    # to errors due to block allocation.
    a = array.array('i')
    sizeof_i = a.itemsize
    target_size = 1024
    n = int(target_size * 1024 / sizeof_i)
    a = array.array('i', n * (1,))
    with open(os.path.join(cachedir, 'test'), 'wb') as output:
        a.tofile(output)
    assert disk_used(cachedir) >= target_size
    assert disk_used(cachedir) < target_size + 12
Exemplo n.º 2
0
def test_disk_used(tmpdir):
    cachedir = tmpdir.strpath
    # Not write a file that is 1M big in this directory, and check the
    # size. The reason we use such a big file is that it makes us robust
    # to errors due to block allocation.
    a = array.array('i')
    sizeof_i = a.itemsize
    target_size = 1024
    n = int(target_size * 1024 / sizeof_i)
    a = array.array('i', n * (1, ))
    with open(os.path.join(cachedir, 'test'), 'wb') as output:
        a.tofile(output)
    assert disk_used(cachedir) >= target_size
    assert disk_used(cachedir) < target_size + 12
Exemplo n.º 3
0
def bench_compress(dataset, name='',
                   compress=('zlib', 0), cache_size=0, tries=5):
    """Bench joblib dump and load functions, compress modes."""
    # generate output compression strategy string before joblib compatibility
    # check as it may override the compress variable with a non tuple type.
    compress_str = "Raw" if compress[1] == 0 else "{0} {1}".format(*compress)

    # joblib versions prior to 0.10 doesn't support tuple in compress argument
    # so only the second element of the tuple is used for those versions
    # and the compression strategy is ignored.
    if (isinstance(compress, tuple) and
            tuple(map(int, joblib.__version__.split('.')[:2])) < (0, 10)):
        compress = compress[1]

    time_write = time_read = du = mem_read = mem_write = []
    clear_out()
    time_write, obj = timeit(joblib.dump, dataset, 'out/test.pkl',
                             tries=tries,
                             compress=compress, cache_size=cache_size)
    del obj
    gc.collect()
    mem_write = memory_used(joblib.dump, dataset, 'out/test.pkl',
                            compress=compress, cache_size=cache_size)
    delete_obj(dataset)
    du = disk_used('out') / 1024.
    time_read, obj = timeit(joblib.load, 'out/test.pkl', tries=tries)
    delete_obj(obj)
    mem_read = memory_used(joblib.load, 'out/test.pkl')
    print_line(name, compress_str, time_write, time_read,
               mem_write, mem_read, du)
Exemplo n.º 4
0
def bench_mmap(dataset, name='', cache_size=0, mmap_mode='r', tries=5):
    """Bench joblib dump and load functions, memmap modes."""
    time_write = time_read = du = []
    clear_out()
    time_write, _ = timeit(joblib.dump,
                           dataset,
                           'out/test.pkl',
                           tries=tries,
                           cache_size=cache_size)
    mem_write = memory_used(joblib.dump,
                            dataset,
                            'out/test.pkl',
                            cache_size=cache_size)

    delete_obj(dataset)

    time_read, obj = timeit(joblib.load,
                            'out/test.pkl',
                            tries=tries,
                            mmap_mode=mmap_mode)
    delete_obj(obj)
    mem_read = memory_used(joblib.load, 'out/test.pkl', mmap_mode=mmap_mode)
    du = disk_used('out') / 1024.
    print_line(name, 'mmap %s' % mmap_mode, time_write, time_read, mem_write,
               mem_read, du)
Exemplo n.º 5
0
def test_disk_used():
    cachedir = mkdtemp()
    try:
        if os.path.exists(cachedir):
            shutil.rmtree(cachedir)
        os.mkdir(cachedir)
        # Not write a file that is 1M big in this directory, and check the
        # size. The reason we use such a big file is that it makes us robust
        # to errors due to block allocation.
        a = array.array('i')
        sizeof_i = a.itemsize
        target_size = 1024
        n = int(target_size * 1024 / sizeof_i)
        a = array.array('i', n * (1, ))
        with open(os.path.join(cachedir, 'test'), 'wb') as output:
            a.tofile(output)
        nose.tools.assert_true(disk_used(cachedir) >= target_size)
        nose.tools.assert_true(disk_used(cachedir) < target_size + 12)
    finally:
        shutil.rmtree(cachedir)
Exemplo n.º 6
0
def test_disk_used():
    cachedir = mkdtemp()
    try:
        if os.path.exists(cachedir):
            shutil.rmtree(cachedir)
        os.mkdir(cachedir)
        # Not write a file that is 1M big in this directory, and check the
        # size. The reason we use such a big file is that it makes us robust
        # to errors due to block allocation.
        a = array.array('i')
        sizeof_i = a.itemsize
        target_size = 1024
        n = int(target_size * 1024 / sizeof_i)
        a = array.array('i', n * (1,))
        with open(os.path.join(cachedir, 'test'), 'wb') as output:
            a.tofile(output)
        nose.tools.assert_true(disk_used(cachedir) >= target_size)
        nose.tools.assert_true(disk_used(cachedir) < target_size + 12)
    finally:
        shutil.rmtree(cachedir)
Exemplo n.º 7
0
def bench_mmap(dataset, name='', cache_size=0, mmap_mode='r', tries=5):
    """Bench joblib dump and load functions, memmap modes."""
    time_write = time_read = du = []
    clear_out()
    time_write, _ = timeit(joblib.dump, dataset, 'out/test.pkl',
                           tries=tries,
                           cache_size=cache_size)
    mem_write = memory_used(joblib.dump, dataset, 'out/test.pkl',
                            cache_size=cache_size)

    delete_obj(dataset)

    time_read, obj = timeit(joblib.load, 'out/test.pkl',
                            tries=tries,
                            mmap_mode=mmap_mode)
    delete_obj(obj)
    mem_read = memory_used(joblib.load, 'out/test.pkl', mmap_mode=mmap_mode)
    du = disk_used('out') / 1024.
    print_line(name, 'mmap %s' % mmap_mode,
               time_write, time_read, mem_write, mem_read, du)
Exemplo n.º 8
0
def bench_compress(dataset,
                   name='',
                   compress=('zlib', 0),
                   cache_size=0,
                   tries=5):
    """Bench joblib dump and load functions, compress modes."""
    # generate output compression strategy string before joblib compatibility
    # check as it may override the compress variable with a non tuple type.
    compress_str = "Raw" if compress[1] == 0 else "{0} {1}".format(*compress)

    # joblib versions prior to 0.10 doesn't support tuple in compress argument
    # so only the second element of the tuple is used for those versions
    # and the compression strategy is ignored.
    if (isinstance(compress, tuple)
            and tuple(map(int,
                          joblib.__version__.split('.')[:2])) < (0, 10)):
        compress = compress[1]

    time_write = time_read = du = mem_read = mem_write = []
    clear_out()
    time_write, obj = timeit(joblib.dump,
                             dataset,
                             'out/test.pkl',
                             tries=tries,
                             compress=compress,
                             cache_size=cache_size)
    del obj
    gc.collect()
    mem_write = memory_used(joblib.dump,
                            dataset,
                            'out/test.pkl',
                            compress=compress,
                            cache_size=cache_size)
    delete_obj(dataset)
    du = disk_used('out') / 1024.
    time_read, obj = timeit(joblib.load, 'out/test.pkl', tries=tries)
    delete_obj(obj)
    mem_read = memory_used(joblib.load, 'out/test.pkl')
    print_line(name, compress_str, time_write, time_read, mem_write, mem_read,
               du)