示例#1
0
def test_hdf5(tmpdir):
    path = str(tmpdir.join('test.hdf5'))
    s = ['aap', 'noot']
    df = vaex.from_arrays(x=[1, 2], y=[3, 4], s=s)
    df.export(path)
    fake_path = 's3://vaex/test.hdf5?profile_name=foo'
    length = os.stat(path).st_size
    with open(path, 'rb') as fp:
        fp.seek(0, 2)
        cache = vaex.file.cache.CachedFile(vaex.file.dup(fp),
                                           fake_path,
                                           str(tmpdir),
                                           block_size=2)
        ds = vaex.hdf5.dataset.Hdf5MemoryMapped(cache)
        df = vaex.dataframe.DataFrameLocal(ds)
        assert df.x.tolist() == [1, 2]
        assert df.y.tolist() == [3, 4]
        assert df.s.tolist() == s
        assert df.sum('x') == 3
        cache = vaex.file.dup(cache)
        cache.seek(0)
        data_cache = cache.read()
        # del df
    with open(path, 'rb') as f:
        length2 = f.seek(0, 2)
        length2 = f.tell()
        assert length == length2
        f.seek(0, 0)
        data = f.read()
        assert data_cache == data
示例#2
0
def test_cache(tmpdir):
    path = str(tmpdir.join('test.txt'))
    data = b'1234567890'
    with open(path, 'wb') as f:
        f.write(data)
    fake_path = 's3://vaex/test.hdf5?profile_name=foo'
    with open(path, 'rb') as fp:
        cache = vaex.file.cache.CachedFile(fp,
                                           fake_path,
                                           str(tmpdir),
                                           block_size=2)
        assert cache.tell() == 0
        assert cache.read(1) == b'1'
        assert cache.block_reads == 1
        assert cache.reads == 1
        assert cache.data_file.data[0] == ord('1')
        assert cache.data_file.data[1] == ord('2')
        assert cache.data_file.data[2] == 0

        assert cache.mask_file.data[0] == 1
        assert cache.mask_file.data[1] == 0
        assert cache.mask_file.data[2] == 0

        assert cache.tell() == 1
        assert cache.read(1) == b'2'
        assert cache.block_reads == 1
        assert cache.reads == 1
        assert cache.data_file.data[0] == ord('1')
        assert cache.data_file.data[1] == ord('2')
        assert cache.data_file.data[2] == 0

        assert cache.mask_file.data[0] == 1
        assert cache.mask_file.data[1] == 0
        assert cache.mask_file.data[2] == 0

        assert cache.tell() == 2
        cache.seek(4)
        assert cache.read(1) == b'5'
        assert cache.block_reads == 2
        assert cache.reads == 2
        assert cache.data_file.data[0] == ord('1')
        assert cache.data_file.data[1] == ord('2')
        assert cache.data_file.data[2] == 0
        assert cache.data_file.data[3] == 0
        assert cache.data_file.data[4] == ord('5')
        assert cache.data_file.data[5] == ord('6')
        assert cache.data_file.data[6] == 0

        assert cache.mask_file.data[0] == 1
        assert cache.mask_file.data[1] == 0
        assert cache.mask_file.data[2] == 1
        assert cache.mask_file.data[3] == 0

        assert cache.tell() == 5
        cache.seek(8)
        assert cache.read(2) == b'90'
        assert cache.block_reads == 3
        assert cache.reads == 3
        assert cache.data_file.data[0] == ord('1')
        assert cache.data_file.data[1] == ord('2')
        assert cache.data_file.data[2] == 0
        assert cache.data_file.data[3] == 0
        assert cache.data_file.data[4] == ord('5')
        assert cache.data_file.data[5] == ord('6')
        assert cache.data_file.data[6] == 0
        assert cache.data_file.data[7] == 0
        assert cache.data_file.data[8] == ord('9')
        assert cache.data_file.data[9] == ord('0')

        assert cache.mask_file.data[0] == 1
        assert cache.mask_file.data[1] == 0
        assert cache.mask_file.data[2] == 1
        assert cache.mask_file.data[3] == 0
        assert cache.mask_file.data[4] == 1

        cache.seek(1)
        assert cache.read(8) == b'23456789'
        assert cache.block_reads == 5
        assert cache.reads == 5
        assert cache.data_file.data[0] == ord('1')
        assert cache.data_file.data[1] == ord('2')
        assert cache.data_file.data[2] == ord('3')
        assert cache.data_file.data[3] == ord('4')
        assert cache.data_file.data[4] == ord('5')
        assert cache.data_file.data[5] == ord('6')
        assert cache.data_file.data[6] == ord('7')
        assert cache.data_file.data[7] == ord('8')
        assert cache.data_file.data[8] == ord('9')
        assert cache.data_file.data[9] == ord('0')

        assert cache.mask_file.data[0] == 1
        assert cache.mask_file.data[1] == 1
        assert cache.mask_file.data[2] == 1
        assert cache.mask_file.data[3] == 1
        assert cache.mask_file.data[4] == 1

        # reset
        cache.data_file.data[:] = ord('x')
        cache.mask_file.data[:] = 0
        cache.block_reads = 0
        cache.reads = 0
        cache.seek(3)
        assert cache.read(2) == b'45'
        assert cache.mask_file.data.tolist() == [0, 1, 1, 0, 0]
        assert cache.block_reads == 2
        assert cache.reads == 1
        cache.seek(0)
        # this should do two reads, once of the first block, and then the last two at once
        assert cache.read(10) == b'1234567890'
        assert cache.block_reads == 5
        assert cache.reads == 3
        assert cache.mask_file.data.tolist() == [1, 1, 1, 1, 1]
示例#3
0
def test_cache(tmpdir):
    path = str(tmpdir.join('test.txt'))
    data = b'1234567890'
    with open(path, 'wb') as f:
        f.write(data)
    fake_path = 's3://vaex/test.hdf5?profile_name=foo'
    with open(path, 'rb') as fp:
        cache = vaex.file.cache.CachedFile(fp,
                                           fake_path,
                                           str(tmpdir),
                                           block_size=2)
        assert cache.tell() == 0
        assert cache.read(1) == b'1'
        assert cache.block_reads == 1
        assert cache.reads == 1
        assert cache.data_file.data[0] == ord('1')
        assert cache.data_file.data[1] == ord('2')
        assert cache.data_file.data[2] == 0

        assert cache.mask_file.data[0] == 1
        assert cache.mask_file.data[1] == 0
        assert cache.mask_file.data[2] == 0

        assert cache.tell() == 1
        assert cache.read(1) == b'2'
        assert cache.block_reads == 1
        assert cache.reads == 1
        assert cache.data_file.data[0] == ord('1')
        assert cache.data_file.data[1] == ord('2')
        assert cache.data_file.data[2] == 0

        assert cache.mask_file.data[0] == 1
        assert cache.mask_file.data[1] == 0
        assert cache.mask_file.data[2] == 0

        assert cache.tell() == 2
        cache.seek(4)
        assert cache.read(1) == b'5'
        assert cache.block_reads == 2
        assert cache.reads == 2
        assert cache.data_file.data[0] == ord('1')
        assert cache.data_file.data[1] == ord('2')
        assert cache.data_file.data[2] == 0
        assert cache.data_file.data[3] == 0
        assert cache.data_file.data[4] == ord('5')
        assert cache.data_file.data[5] == ord('6')
        assert cache.data_file.data[6] == 0

        assert cache.mask_file.data[0] == 1
        assert cache.mask_file.data[1] == 0
        assert cache.mask_file.data[2] == 1
        assert cache.mask_file.data[3] == 0

        assert cache.tell() == 5
        cache.seek(8)
        assert cache.read(2) == b'90'
        assert cache.block_reads == 3
        assert cache.reads == 3
        assert cache.data_file.data[0] == ord('1')
        assert cache.data_file.data[1] == ord('2')
        assert cache.data_file.data[2] == 0
        assert cache.data_file.data[3] == 0
        assert cache.data_file.data[4] == ord('5')
        assert cache.data_file.data[5] == ord('6')
        assert cache.data_file.data[6] == 0
        assert cache.data_file.data[7] == 0
        assert cache.data_file.data[8] == ord('9')
        assert cache.data_file.data[9] == ord('0')

        assert cache.mask_file.data[0] == 1
        assert cache.mask_file.data[1] == 0
        assert cache.mask_file.data[2] == 1
        assert cache.mask_file.data[3] == 0
        assert cache.mask_file.data[4] == 1

        cache.seek(1)
        assert cache.read(8) == b'23456789'
        assert cache.block_reads == 5
        assert cache.reads == 5
        assert cache.data_file.data[0] == ord('1')
        assert cache.data_file.data[1] == ord('2')
        assert cache.data_file.data[2] == ord('3')
        assert cache.data_file.data[3] == ord('4')
        assert cache.data_file.data[4] == ord('5')
        assert cache.data_file.data[5] == ord('6')
        assert cache.data_file.data[6] == ord('7')
        assert cache.data_file.data[7] == ord('8')
        assert cache.data_file.data[8] == ord('9')
        assert cache.data_file.data[9] == ord('0')

        assert cache.mask_file.data[0] == 1
        assert cache.mask_file.data[1] == 1
        assert cache.mask_file.data[2] == 1
        assert cache.mask_file.data[3] == 1
        assert cache.mask_file.data[4] == 1