def test_objectview(): schema = SchemaDict({ "a": Tensor((20, 20), dtype=int, max_shape=(20, 20)), "b": Sequence(dtype=BBox(dtype=float)), "c": Sequence(dtype=SchemaDict( {"d": Sequence((), dtype=Tensor((5, 5), dtype=float))})), "e": Sequence(dtype={ "f": { "g": Tensor(5, dtype=int), "h": Tensor((), dtype=int) } }), }) ds = hub.Dataset("./nested_seq", shape=(5, ), mode="w", schema=schema) # dataset view to objectview dv = ds[3:5] dv["c", 0] = {"d": 5 * np.ones((2, 2, 5, 5))} assert (dv[0, "c", 0, "d", 0].compute() == 5 * np.ones((5, 5))).all() # dataset view unsqueezed with pytest.raises(IndexError): dv["c", "d"].compute() # dataset unsqueezed with pytest.raises(IndexError): ds["c", "d"].compute() # tensorview to object view # sequence of tensor ds["b", 0] = 0.5 * np.ones((5, 4)) tv = ds["b", 0] tv[0] = 0.3 * np.ones((4, )) assert (tv[0].compute() == 0.3 * np.ones((4, ))).all() # ds to object view assert (ds[3, "c", "d"].compute() == 5 * np.ones((2, 2, 5, 5))).all() # Sequence of schemadicts ds[0, "e"] = {"f": {"g": np.ones((3, 5)), "h": np.array([42, 25, 15])}} with pytest.raises(KeyError): ds[0, "e", 1].compute() assert (ds[0, "e", "f", "h"].compute() == np.array([42, 25, 15])).all() # With dataset view dv[0, "e"] = {"f": {"g": np.ones((3, 5)), "h": np.array([1, 25, 1])}} # dv[0, "e", 1]["f", "h"] = 25 assert (dv[0, "e", "f", "h"].compute() == np.array([1, 25, 1])).all() # If not lazy mode all slices should be stable ds.lazy = False assert ds[0, "e", 0, "f", "h"] == 42 with pytest.raises(KeyError): ds[0, "e", 1]["f", "h"] == 25 ds.lazy = True # make an objectview ov = ds["c", "d"] with pytest.raises(IndexError): ov.compute() assert (ov[3].compute() == 5 * np.ones((2, 2, 5, 5))).all() # ov[3, 1] = 2 * np.ones((2, 5, 5)) assert (ov[3][0, 0].compute() == 5 * np.ones((5, 5))).all() assert (ov[3][1].compute() == 5 * np.ones((2, 5, 5))).all()
from hub.exceptions import DirectoryNotEmptyException from hub.schema import BBox, ClassLabel, Image, SchemaDict, Sequence, Tensor, Text from hub.schema.class_label import ClassLabel from hub.utils import ( azure_creds_exist, gcp_creds_exist, hub_creds_exist, minio_creds_exist, s3_creds_exist, transformers_loaded, ) Dataset = dataset.Dataset my_schema = { "image": Tensor((10, 1920, 1080, 3), "uint8"), "label": { "a": Tensor((100, 200), "int32", compressor="lz4"), "b": Tensor((100, 400), "int64", compressor="zstd"), "c": Tensor((5, 3), "uint8"), "d": {"e": Tensor((5, 3), "uint8")}, }, } def test_dataset_2(): dt = {"first": "float", "second": "float"} ds = Dataset(schema=dt, shape=(2,), url="./data/test/test_dataset2", mode="w") ds.meta_information["description"] = "This is my description" ds["first"][0] = 2.3
import numpy as np import zarr import hub from hub.schema import Tensor, Image, Text from hub.utils import Timer my_schema = { "image": Tensor((28, 28, 4), "int32", (28, 28, 4)), "label": Text((None, ), "int64", (20, )), "confidence": "float", } dynamic_schema = { "image": Tensor(shape=(None, None, None), dtype="int32", max_shape=(32, 32, 3)), "label": Text((None, ), "int64", (20, )), } def test_pipeline_basic(): ds = hub.Dataset("./data/test/test_pipeline_basic", mode="w", shape=(100, ), schema=my_schema) for i in range(len(ds)): ds["image", i] = np.ones((28, 28, 4), dtype="int32") ds["label", i] = f"hello {i}" ds["confidence", i] = 0.2
def benchmark(sample_size=100, width=1000, channels=4, dtype="int8"): numpy_arr = np.zeros((sample_size, width, width, channels), dtype=dtype) zarr_fs = zarr.zeros( (sample_size, width, width, channels), dtype=dtype, store=zarr.storage.FSStore("./data/test/array"), overwrite=True, ) zarr_lmdb = zarr.zeros( (sample_size, width, width, channels), dtype=dtype, store=zarr.storage.LMDBStore("./data/test/array2"), overwrite=True, ) my_schema = { "image": Tensor((width, width, channels), dtype, (width, width, channels)), } ds_fs = hub.Dataset( "./data/test/test_pipeline_basic_3", mode="w", shape=(sample_size, ), schema=my_schema, cache=0, ) ds_fs_cache = hub.Dataset( "./data/test/test_pipeline_basic_2", mode="w", shape=(sample_size, ), schema=my_schema, ) if False: print( f"~~~ Sequential write of {sample_size}x{width}x{width}x{channels} random arrays ~~~" ) for name, arr in [ ("Numpy", numpy_arr), ("Zarr FS", zarr_fs), ("Zarr LMDB", zarr_lmdb), ("Hub FS", ds_fs["image"]), ("Hub FS+Cache", ds_fs_cache["image"]), ]: with Timer(name): for i in range(sample_size): arr[i] = (np.random.rand(width, width, channels) * 255).astype(dtype) print( f"~~~ Pipeline {sample_size}x{width}x{width}x{channels} random arrays ~~~" ) for name, processes in [ ("single", 1), ("processed", 10), ]: # , ("ray", 10), ("green", 10), ("dask", 10)]: @hub.transform(schema=my_schema, scheduler=name, processes=processes) def my_transform(sample): return { "image": (np.random.rand(width, width, channels) * 255).astype(dtype), } with Timer(name): out_ds = my_transform(ds_fs) out_ds.store(f"./data/test/test_pipeline_basic_output_{name}")
def test_tensor_repr(): tensor_object_2 = Tensor(shape=(5000,), dtype="<U20") assert tensor_object_2.__repr__() == "Tensor(shape=(5000,), dtype='<U20')"
def test_tensor_error(): try: Tensor(None, max_shape=None) except TypeError as ex: assert "both shape and max_shape cannot be None at the same time" in str(ex)
from hub import Dataset from hub.api.datasetview import TensorView from hub.exceptions import NoneValueException from hub.schema import Tensor import numpy as np import pytest my_schema = { "image": Tensor((None, None, None, None), "uint8", max_shape=(10, 1920, 1080, 4)), "label": float, } ds = Dataset("./data/test/dataset", shape=(100, ), mode="w", schema=my_schema) def test_tensorview_init(): with pytest.raises(NoneValueException): tensorview_object = TensorView(ds, subpath=None) with pytest.raises(NoneValueException): tensorview_object_2 = TensorView(dataset=None, subpath="image") def test_tensorview_getitem(): images_tensorview = ds["image"] with pytest.raises(IndexError): images_tensorview["7", 0:1920, 0:1080, 0:3].compute()
1024, # 2048, # 4096, # 8192, # 8192 * 2, # 8192 * 4, # 8192 * 8, ] download_time = [] upload_time = [] for cs in chunk_sizes: shape = (1, ) my_schema = { "img": Tensor(shape=(cs, cs), chunks=cs, dtype="uint8", compressor="default") } ds = hub.Dataset("test/benchmark:t{}".format(str(cs)), shape=shape, schema=my_schema) arr = (255 * np.random.rand(shape[0], cs, cs)).astype("uint8") # Upload t1 = time.time() ds["img"][:] = arr t2 = time.time() upload_time.append(t2 - t1) # Download t3 = time.time() ds["img"][:]
def test_tensor_error_2(): with pytest.raises(TypeError): t1 = Tensor(shape=(5.1)) with pytest.raises(TypeError): t2 = Tensor(shape=(5.1, )) with pytest.raises(TypeError): t3 = Tensor(shape=(5, 6), max_shape=(7.2, 8)) with pytest.raises(ValueError): t4 = Tensor(shape=(5, 6), max_shape=(7, 8, 9)) with pytest.raises(TypeError): t5 = Tensor(shape=(5, None), max_shape=(5, None)) with pytest.raises(TypeError): t6 = Tensor(shape=(5, 6), max_shape=(7.2, 8)) with pytest.raises(ValueError): t7 = Tensor(max_shape=(10, 15)) with pytest.raises(TypeError): t8 = Tensor(None) with pytest.raises(ValueError): t9 = Tensor((5, 6, None)) with pytest.raises(TypeError): t10 = Tensor(max_shape="abc") with pytest.raises(TypeError): t11 = Tensor(max_shape=(7.4, 2)) with pytest.raises(ValueError): t12 = Tensor(max_shape=[])
def test_dataset_change_schema(): schema = { "abc": "uint8", "def": { "ghi": Tensor((100, 100)), "rst": Tensor((100, 100, 100)), }, } ds = Dataset("./data/test_schema_change", schema=schema, shape=(100, )) new_schema_1 = { "abc": "uint8", "def": { "ghi": Tensor((200, 100)), "rst": Tensor((100, 100, 100)), }, } new_schema_2 = { "abrs": "uint8", "def": { "ghi": Tensor((100, 100)), "rst": Tensor((100, 100, 100)), }, } new_schema_3 = { "abc": "uint8", "def": { "ghijk": Tensor((100, 100)), "rst": Tensor((100, 100, 100)), }, } new_schema_4 = { "abc": "uint16", "def": { "ghi": Tensor((100, 100)), "rst": Tensor((100, 100, 100)), }, } new_schema_5 = { "abc": "uint8", "def": { "ghi": Tensor((100, 100, 3)), "rst": Tensor((100, 100, 100)), }, } with pytest.raises(SchemaMismatchException): ds = Dataset("./data/test_schema_change", schema=new_schema_1, shape=(100, )) with pytest.raises(SchemaMismatchException): ds = Dataset("./data/test_schema_change", schema=new_schema_2, shape=(100, )) with pytest.raises(SchemaMismatchException): ds = Dataset("./data/test_schema_change", schema=new_schema_3, shape=(100, )) with pytest.raises(SchemaMismatchException): ds = Dataset("./data/test_schema_change", schema=new_schema_4, shape=(100, )) with pytest.raises(SchemaMismatchException): ds = Dataset("./data/test_schema_change", schema=new_schema_5, shape=(100, ))
def test_tensor_error(): try: Tensor(None, max_shape=None) except TypeError as ex: assert "shape cannot be None" in str(ex)
""" License: This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. """ import numpy as np import hub from hub.schema import Tensor schema = { "image": Tensor((10, 1920, 1080, 3), "uint8"), "label": { "a": Tensor((100, 200), "int32"), "b": Tensor((100, 400), "int64"), }, } def test_hub_open(): ds = hub.Dataset("./data/test/hub_open", token=None, shape=(10000, ), mode="w", schema=schema) ds["label/a", 5, 50, 50] = 9 assert ds["label/a", 5, 50, 50].numpy() == 9 ds["image", 5, 4, 120:200, 150:300, :] = 3 * np.ones((80, 150, 3), "uint8") assert (ds["image", 5, 4, 120:200, 150:300, :].numpy() == 3 * np.ones( (80, 150, 3), "uint8")).all()
def test_tensor_repr(): tensor_object = Tensor() tensor_object_2 = Tensor(shape=(5000,), dtype="<U20") assert tensor_object.__repr__() == "Tensor(shape=(None,), dtype='float64')" assert tensor_object_2.__repr__() == "Tensor(shape=(5000,), dtype='<U20')"
def test_dataset_bug_2(url="./data/test/dataset", token=None): my_schema = { "image": Tensor((100, 100), "uint8"), } ds = Dataset(url, token=token, shape=(10000,), mode="w", schema=my_schema) ds["image", 0:1] = [np.zeros((100, 100))]
def test_tensor_init(): with pytest.raises(ValueError): Tensor(shape=2, max_shape=(2, 2))
def test_dataset_no_shape(url="./data/test/dataset", token=None): try: Tensor(shape=(120, 120, 3), max_shape=(120, 120, 4)) except ValueError: pass
give 'max_shape' arguement a maximum possible size of image. """ "image": schema.Image(shape=(None, None, 3), max_shape=(1920, 1920, 3), dtype="uint8"), "isValidation": "float64", "img_paths": Text(shape=(None, ), max_shape=(15, )), "img_width": "int32", "img_height": "int32", "objpos": Tensor(max_shape=(100, ), dtype="float64"), """ 'joint_self' has nested list structure """ "joint_self": Tensor(shape=(None, None), max_shape=(100, 100), dtype="float64"), "scale_provided": "float64", "annolist_index": "int32", "people_index": "int32", "numOtherPeople": "int32", } """