Пример #1
0
    def test_zarrdatastore_list(self):
        ## WRITE
        assert not os.path.exists(os.path.join(self.data_dir, 'test.mdb'))
        zarr_datastore = ZarrDataStore(os.path.join(self.data_dir, 'test.mdb'),
                                       datastore_type=DatastoreType.DIRECTORY,
                                       compression_type=CompressionType.LZMA)
        recorder = Recorder(zarr_datastore)

        for i in range(self.n_arrays):
            array = self.arrays[i]
            recorder.record(self.key, array)
        recorder.close()
        ## END WRITE

        ## READ
        zarr_datastore = ZarrDataStore(os.path.join(self.data_dir, 'test.mdb'),
                                       datastore_type=DatastoreType.DIRECTORY,
                                       compression_type=CompressionType.LZMA)
        recorder = Recorder(zarr_datastore)

        l = recorder.get_all(self.key)
        l = np.array(l)
        self.assertTrue((self.arrays == l).all())

        recorder.close()
Пример #2
0
def main():
    read_only = False

    data_dir = os.path.expanduser('~/output/tmp/redis-test')
    os.makedirs(data_dir, exist_ok=True)
    key = 'train.what'
    bs = 10
    sts = 500 * 20
    ns = 200
    n_arrays = 100
    # serialization = Serialization.PYARROW
    serialization = Serialization.PICKLE

    if not read_only:
        arrays = np.random.rand(n_arrays, bs, sts, ns)

        if os.path.exists(data_dir):
            print("Removing existing dir")
            shutil.rmtree(data_dir)
        os.makedirs(data_dir, exist_ok=True)

        with RedisServer(data_directory=data_dir, serialization=serialization):
            ## WRITE
            redis_datastore = RedisDataStore(server_host='localhost')
            recorder = Recorder(redis_datastore)

            with Timer() as wt:
                write_times = []
                for i in range(n_arrays):
                    array = arrays[i]
                    with Timer() as st:
                        recorder.record(key, array)
                    print("%d: Storing took %.2fs" % (i, st.difftime))
                    write_times.append(st.difftime)
                print("Mean write time was %.4fs (+/- %.4f)" % (np.mean(write_times), np.std(write_times)))
                recorder.close()
            print("Total write time was %.2fs" % wt.difftime)
            ## END WRITE

        print("Dir size after write is %d MiB" % (int(get_size(data_dir)) / 1024 / 1024))

    ## READ
    with RedisServer(data_directory=data_dir, serialization=serialization):
        redis_datastore = RedisDataStore(server_host='localhost', data_directory=data_dir, serialization=serialization)
        recorder = Recorder(redis_datastore)

    with Timer() as rt:
        l = recorder.get_all(key)
    print("Reading took %.2fs" % rt.difftime)

    with Timer() as rrt:
        l = np.array(l)
    print("Into array took %.2f" % rrt.difftime)

    print("Mean is", np.mean(l), l.shape)

    recorder.close()
Пример #3
0
    def test_inmemorydatastore_list(self):
        ## WRITE
        inmem_datastore = InMemoryDataStore()
        recorder = Recorder(inmem_datastore)

        for i in range(self.n_arrays):
            array = self.arrays[i]
            recorder.record(self.key, array)
        recorder.close()
        ## END WRITE

        ## READ
        # If the in-memory datastore is initialized, all data will be reset!
        recorder = Recorder(inmem_datastore)

        l = recorder.get_all(self.key)
        l = np.array(l)
        self.assertTrue((self.arrays == l).all())

        recorder.close()
Пример #4
0
    def test_redisdatastore_list(self):
        with RedisServer(data_directory=self.data_dir):
            ## WRITE
            redis_datastore = RedisDataStore(server_host='localhost')
            recorder = Recorder(redis_datastore)

            for i in range(self.n_arrays):
                array = self.arrays[i]
                recorder.record(self.key, array)
            recorder.close()
            ## END WRITE

        with RedisServer(data_directory=self.data_dir):
            ## READ
            redis_datastore = RedisDataStore(server_host='localhost')
            recorder = Recorder(redis_datastore)

            l = recorder.get_all(self.key)
            l = np.array(l)
            self.assertTrue((self.arrays == l).all())

            recorder.close()
Пример #5
0
    def test_hdf5datastore_list(self):
        ## WRITE
        self.file_pth = os.path.join(self.data_dir, 'data.h5')
        hdf5_datastore = HDF5DataStore(self.file_pth)
        recorder = Recorder(hdf5_datastore)

        for i in range(self.n_arrays):
            array = self.arrays[i]
            recorder.record(self.key, array)
            # if i == 0:
            #     hdf5_datastore.enable_swmr()
        recorder.close()
        ## END WRITE

        ## READ
        hdf5_datastore = HDF5DataStore(self.file_pth)
        recorder = Recorder(hdf5_datastore)

        l = recorder.get_all(self.key)
        l = np.array(l)
        self.assertTrue((self.arrays == l).all())

        recorder.close()
Пример #6
0
    def test_zarrdatastore_object(self):
        ## WRITE
        assert not os.path.exists(os.path.join(self.data_dir, 'test.mdb'))
        zarr_datastore = ZarrDataStore(os.path.join(self.data_dir, 'test.mdb'),
                                       datastore_type=DatastoreType.DIRECTORY,
                                       compression_type=CompressionType.LZMA)
        recorder = Recorder(zarr_datastore)

        test_list_1 = [
            np.random.rand(3, 1),
            np.random.rand(4, 1),
            np.array([])
        ]
        test_list_2 = [
            np.random.rand(3, 1),
            np.random.rand(4, 1),
            np.array([])
        ]
        test_list = [test_list_1, test_list_2]

        recorder.record(self.key, test_list_1)
        recorder.record(self.key, test_list_2)
        recorder.close()
        ## END WRITE

        ## READ
        zarr_datastore = ZarrDataStore(os.path.join(self.data_dir, 'test.mdb'),
                                       datastore_type=DatastoreType.DIRECTORY,
                                       compression_type=CompressionType.LZMA)
        recorder = Recorder(zarr_datastore)

        l = recorder.get_all(self.key)
        for j in range(len(l)):
            for i in range(3):
                self.assertTrue((test_list[j][i] == l[j][i]).all())

        recorder.close()
Пример #7
0
    def test_zarrdatastore_list_scalar(self):
        ## WRITE
        assert not os.path.exists(os.path.join(self.data_dir, 'test.mdb'))
        zarr_datastore = ZarrDataStore(os.path.join(self.data_dir, 'test.mdb'),
                                       datastore_type=DatastoreType.DIRECTORY,
                                       compression_type=CompressionType.LZMA)
        recorder = Recorder(zarr_datastore)

        recorder.record(self.key, 10.)
        recorder.record(self.key, 20.)
        recorder.close()
        ## END WRITE

        ## READ
        zarr_datastore = ZarrDataStore(os.path.join(self.data_dir, 'test.mdb'),
                                       datastore_type=DatastoreType.DIRECTORY,
                                       compression_type=CompressionType.LZMA)
        recorder = Recorder(zarr_datastore)

        l = recorder.get_all(self.key)
        l = np.array(l)
        self.assertTrue((np.array([10., 20.]) == l).all())

        recorder.close()
Пример #8
0
def main():
    read_only = False
    data_dir = os.path.expanduser('~/output/tmp/zarr-test')
    file_pth = os.path.join(data_dir, 'data.mdb')
    key = 'train/what'
    bs = 10
    sts = 500 * 20
    ns = 200
    n_arrays = 100
    chunk_size_mb = 0.1

    if not read_only:
        arrays = np.random.rand(n_arrays, bs, sts, ns)

        os.makedirs(data_dir, exist_ok=True)
        if os.path.exists(file_pth):
            print("Removing existing dir")
            shutil.rmtree(file_pth)

        ## WRITE
        zarr_datastore = ZarrDataStore(file_pth,
                                       desired_chunk_size_bytes=chunk_size_mb *
                                       1024**2)
        recorder = Recorder(zarr_datastore)

        with Timer() as wt:
            write_times = []
            for i in range(n_arrays):
                array = arrays[i]
                with Timer() as st:
                    recorder.record(key, array)
                print("%d: Storing took %.2fs" % (i, st.difftime))
                write_times.append(st.difftime)
            print("Mean write time was %.4fs (+/- %.4f)" %
                  (np.mean(write_times), np.std(write_times)))
            recorder.close()
        print("Total write time was %.2fs" % wt.difftime)
        ## END WRITE

        print("Dir size after write is %d MiB" %
              (int(get_size(file_pth)) / 1024 / 1024))

    ## READ
    zarr_datastore = ZarrDataStore(file_pth,
                                   desired_chunk_size_bytes=chunk_size_mb *
                                   1024**2)
    recorder = Recorder(zarr_datastore)

    with Timer() as rt:
        l = recorder.get_all(key)
    print("Reading took %.2fs" % rt.difftime)

    read_times = []
    for i in range(20):
        b = np.random.randint(bs)
        st = np.random.randint(sts)
        with Timer() as rrt:
            ll = np.array(l[:(n_arrays // 2), b, st, :])
        print("Into sub-array took %.4fs" % rrt.difftime)
        read_times.append(rrt.difftime)
    print("Into sub-array mean readtime was %.4fs (+/- %.4f)" %
          (np.mean(read_times), np.std(read_times)))

    with Timer() as rrt:
        l = np.array(l)
    print("Into array (Total read time) was %.2fs" % rrt.difftime)

    print("Data mean is", np.mean(l), l.shape)

    recorder.close()