Пример #1
0
def test_text_dataset_tokenizer():
    schema = {
        "names": Text(shape=(None, ), max_shape=(1000, ), dtype="int64"),
    }
    ds = Dataset("./data/test/testing_text",
                 mode="w",
                 schema=schema,
                 shape=(10, ),
                 tokenizer=True)
    text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."
    ds["names", 4] = text + " 4"
    assert ds["names", 4].numpy() == text + " 4"
    ds["names"][5] = text + " 5"
    assert ds["names"][5].numpy() == text + " 5"
    dsv = ds[7:9]
    dsv["names", 0] = text + " 7"
    assert dsv["names", 0].numpy() == text + " 7"
    dsv["names"][1] = text + " 8"
    assert dsv["names"][1].numpy() == text + " 8"
Пример #2
0
def test_dataset_view_lazy():
    dt = {
        "first": Tensor(shape=(2,)),
        "second": "float",
        "text": Text(shape=(None,), max_shape=(12,)),
    }
    url = "./data/test/dsv_lazy"
    ds = Dataset(schema=dt, shape=(4,), url=url, mode="w")
    ds["text", 3] = "hello world"
    ds["second", 2] = 3.14
    ds["first", 2] = np.array([5, 6])
    dsv = ds[2:]
    dsv.disable_lazy()
    assert dsv["text", 1] == "hello world"
    assert dsv["second", 0] == 3.14
    assert (dsv["first", 0] == np.array([5, 6])).all()
    dsv.enable_lazy()
    assert dsv["text", 1].compute() == "hello world"
    assert dsv["second", 0].compute() == 3.14
    assert (dsv["first", 0].compute() == np.array([5, 6])).all()
Пример #3
0
def test_dataset(url="./data/test/dataset", token=None, public=True):
    ds = Dataset(
        url, token=token, shape=(10000,), mode="w", schema=my_schema, public=public
    )

    sds = ds[5]
    sds["label/a", 50, 50] = 2
    assert sds["label", 50, 50, "a"].numpy() == 2

    ds["image", 5, 4, 100:200, 150:300, :] = np.ones((100, 150, 3), "uint8")
    assert (
        ds["image", 5, 4, 100:200, 150:300, :].numpy()
        == np.ones((100, 150, 3), "uint8")
    ).all()

    ds["image", 8, 6, 500:550, 700:730] = np.ones((50, 30, 3))
    subds = ds[3:15]
    subsubds = subds[4:9]
    assert (
        subsubds["image", 1, 6, 500:550, 700:730].numpy() == np.ones((50, 30, 3))
    ).all()

    subds = ds[5:7]
    ds["image", 6, 3:5, 100:135, 700:720] = 5 * np.ones((2, 35, 20, 3))

    assert (
        subds["image", 1, 3:5, 100:135, 700:720].numpy() == 5 * np.ones((2, 35, 20, 3))
    ).all()

    ds["label", "c"] = 4 * np.ones((10000, 5, 3), "uint8")
    assert (ds["label/c"].numpy() == 4 * np.ones((10000, 5, 3), "uint8")).all()

    ds["label", "c", 2, 4] = 6 * np.ones((3))
    sds = ds["label", "c"]
    ssds = sds[1:3, 4]
    sssds = ssds[1]
    assert (sssds.numpy() == 6 * np.ones((3))).all()
    ds.save()

    sds = ds["/label", 5:15, "c"]
    sds[2:4, 4, :] = 98 * np.ones((2, 3))
    assert (ds[7:9, 4, "label", "/c"].numpy() == 98 * np.ones((2, 3))).all()

    labels = ds["label", 1:5]
    d = labels["d"]
    e = d["e"]
    e[:] = 77 * np.ones((4, 5, 3))
    assert (e.numpy() == 77 * np.ones((4, 5, 3))).all()
    ds.close()
Пример #4
0
def test_dataset_enter_exit():
    with Dataset(
        "./data/test/dataset", token=None, shape=(10000,), mode="w", schema=my_schema
    ) as ds:
        sds = ds[5]
        sds["label/a", 50, 50] = 2
        assert sds["label", 50, 50, "a"].numpy() == 2

        ds["image", 5, 4, 100:200, 150:300, :] = np.ones((100, 150, 3), "uint8")
        assert (
            ds["image", 5, 4, 100:200, 150:300, :].numpy()
            == np.ones((100, 150, 3), "uint8")
        ).all()

        ds["image", 8, 6, 500:550, 700:730] = np.ones((50, 30, 3))
        subds = ds[3:15]
        subsubds = subds[4:9]
        assert (
            subsubds["image", 1, 6, 500:550, 700:730].numpy() == np.ones((50, 30, 3))
        ).all()
Пример #5
0
def test_dataset_view_compute():
    dt = {
        "first": Tensor(shape=(2,)),
        "second": "float",
        "text": Text(shape=(None,), max_shape=(12,)),
    }
    url = "./data/test/dsv_compute"
    ds = Dataset(schema=dt, shape=(4,), url=url, mode="w")
    ds["text", 3] = "hello world"
    ds["second", 2] = 3.14
    ds["first", 2] = np.array([5, 6])
    dsv = ds[2:]
    comp = dsv.compute()
    comp0 = comp[0]
    assert (comp0["first"] == np.array([5, 6])).all()
    assert comp0["second"] == 3.14
    assert comp0["text"] == ""
    comp1 = comp[1]
    assert (comp1["first"] == np.array([0, 0])).all()
    assert comp1["second"] == 0
    assert comp1["text"] == "hello world"
Пример #6
0
def main():
    schema = {
        "image": Image(shape=(None, None), max_shape=(28, 28)),
        "label": ClassLabel(num_classes=10),
    }
    path = "./data/examples/new_api_intro2"

    ds = Dataset(path, shape=(10, ), mode="w", schema=schema)
    print(len(ds))
    for i in range(len(ds)):
        with Timer("writing single element"):
            ds["image", i] = np.ones((28, 28), dtype="uint8")
            ds["label", i] = 3

    ds.resize_shape(200)
    print(ds.shape)
    print(ds["label", 100:110].numpy())
    with Timer("Committing"):
        ds.flush()

    ds = Dataset(path)
    print(ds.schema)
    print(ds["image", 0].compute())
Пример #7
0
def test_pickleability(url="./data/test/test_dataset_dynamic_shaped"):
    schema = {
        "first": Tensor(
            shape=(None, None),
            dtype="int32",
            max_shape=(100, 100),
            chunks=(100,),
        )
    }
    ds = Dataset(
        url=url,
        token=None,
        shape=(1000,),
        mode="w",
        schema=schema,
    )

    ds["first"][0] = np.ones((10, 10))

    pickled_ds = cloudpickle.dumps(ds)
    new_ds = pickle.loads(pickled_ds)
    assert np.all(new_ds["first"][0].compute() == ds["first"][0].compute())
Пример #8
0
def test_sharded_dataset():
    dt = {"first": "float", "second": "float"}
    datasets = [
        Dataset(schema=dt, shape=(10,), url=f"./data/test/test_dataset/{i}", mode="w")
        for i in range(4)
    ]

    ds = ShardedDatasetView(datasets)

    ds[0]["first"] = 2.3
    assert ds[0]["second"].numpy() != 2.3
    assert ds[30]["first"].numpy() == 0
    assert len(ds) == 40
    assert ds.shape == (40,)
    assert type(ds.schema) == SchemaDict
    assert ds.__repr__() == "ShardedDatasetView(shape=(40,))"
    ds[4, "first"] = 3
    for _ in ds:
        pass

    ds2 = ShardedDatasetView([])
    assert ds2.identify_shard(5) == (0, 0)
Пример #9
0
def test_datasetview_get_dictionary():
    ds = Dataset(
        schema=my_schema,
        shape=(20,),
        url="./data/test/datasetview_get_dictionary",
        mode="w",
    )
    ds["label", 5, "a"] = 5 * np.ones((100, 200))
    ds["label", 5, "d", "e"] = 3 * np.ones((5, 3))
    dsv = ds[2:10]
    dsv.disable_lazy()
    dic = dsv[3, "label"]
    assert (dic["a"] == 5 * np.ones((100, 200))).all()
    assert (dic["d"]["e"] == 3 * np.ones((5, 3))).all()
    dsv.enable_lazy()
    ds["label", "a"] = 9 * np.ones((20, 100, 200))
    ds["label", "d", "e"] = 11 * np.ones((20, 5, 3))
    dic2 = dsv["label"]
    assert (dic2["a"].compute() == 9 * np.ones((8, 100, 200))).all()
    assert (dic2["d"]["e"].compute() == 11 * np.ones((8, 5, 3))).all()
    dic3 = ds["label"]
    assert (dic3["a"].compute() == 9 * np.ones((20, 100, 200))).all()
    assert (dic3["d"]["e"].compute() == 11 * np.ones((20, 5, 3))).all()
Пример #10
0
def test_dataset_assign_value():
    schema = {"text": Text(shape=(None,), dtype="int64", max_shape=(10,))}
    url = "./data/test/text_data"
    ds = Dataset(schema=schema, shape=(7,), url=url, mode="w")
    slice_ = slice(0, 5, None)
    key = "text"
    batch = [
        np.array("THTMLY2F9"),
        np.array("QUUVEU2IU"),
        np.array("8ZUFCYWKD"),
        "H9EDFAGHB",
        "WDLDYN6XG",
    ]
    ds[key, slice_] = batch
    ds[key][5] = np.array("GHLSGBFF8")
    ds[key][6] = "YGFJN75NF"
    assert ds["text", 0].compute() == "THTMLY2F9"
    assert ds["text", 1].compute() == "QUUVEU2IU"
    assert ds["text", 2].compute() == "8ZUFCYWKD"
    assert ds["text", 3].compute() == "H9EDFAGHB"
    assert ds["text", 4].compute() == "WDLDYN6XG"
    assert ds["text", 5].compute() == "GHLSGBFF8"
    assert ds["text", 6].compute() == "YGFJN75NF"
Пример #11
0
def test_sharded_dataset_with_views():
    schema = {"first": "float", "second": "float"}
    ds = Dataset("./data/test_sharded_ds", shape=(10,), schema=schema, mode="w")
    for i in range(10):
        ds[i, "first"] = i
        ds[i, "second"] = 2 * i + 1

    dsv = ds[3:5]
    dsv2 = ds[1]
    dsv3 = ds[8:]
    datasets = [dsv, ds, dsv2, dsv3]
    sharded_ds = ShardedDatasetView(datasets)
    for i in range(2):
        assert sharded_ds[i, "first"].compute() == i + 3
        assert sharded_ds[i, "second"].compute() == 2 * (i + 3) + 1
    for i in range(2, 12):
        assert sharded_ds[i, "first"].compute() == i - 2
        assert sharded_ds[i, "second"].compute() == 2 * (i - 2) + 1
    assert sharded_ds[12, "first"].compute() == 1
    assert sharded_ds[12, "second"].compute() == 3
    for i in range(13, 15):
        assert sharded_ds[i, "first"].compute() == i - 5
        assert sharded_ds[i, "second"].compute() == 2 * (i - 5) + 1
Пример #12
0
def test_dataset_dynamic_shaped():
    schema = {
        "first": Tensor(
            shape=(None, None),
            dtype="int32",
            max_shape=(100, 100),
            chunks=(100,),
        )
    }
    ds = Dataset(
        "./data/test/test_dataset_dynamic_shaped",
        token=None,
        shape=(1000,),
        mode="w",
        schema=schema,
    )

    ds["first", 50, 50:60, 50:60] = np.ones((10, 10), "int32")
    assert (ds["first", 50, 50:60, 50:60].numpy() == np.ones((10, 10), "int32")).all()

    ds["first", 0, :10, :10] = np.ones((10, 10), "int32")
    ds["first", 0, 10:20, 10:20] = 5 * np.ones((10, 10), "int32")
    assert (ds["first", 0, 0:10, 0:10].numpy() == np.ones((10, 10), "int32")).all()
Пример #13
0
def test_dataset_dynamic_shaped_slicing():
    schema = {
        "first": Tensor(
            shape=(None, None),
            dtype="int32",
            max_shape=(100, 100),
            chunks=(100,),
        )
    }
    ds = Dataset(
        "./data/test/test_dataset_dynamic_shaped",
        token=None,
        shape=(100,),
        mode="w",
        schema=schema,
    )

    for i in range(100):
        ds["first", i] = i * np.ones((i, i))
    items = ds["first", 0:100].compute()
    for i in range(100):
        assert (items[i] == i * np.ones((i, i))).all()

    assert (ds["first", 1:2].compute()[0] == np.ones((1, 1))).all()
Пример #14
0
def main():
    # Tag is set {Username}/{Dataset}
    tag = "davitb/basic11"

    # Create dataset
    ds = Dataset(
        tag,
        shape=(4, ),
        schema={
            "image": schema.Tensor((512, 512), dtype="float"),
            "label": schema.Tensor((512, 512), dtype="float"),
        },
    )

    # Upload Data
    ds["image"][:] = np.ones((4, 512, 512))
    ds["label"][:] = np.ones((4, 512, 512))
    ds.commit()

    # Load the data
    ds = Dataset(tag)
    print(ds["image"][0].compute())
Пример #15
0
def test_dataset_copy_gcs_s3():
    ds = Dataset(
        "s3://snark-test/cp_original_ds_s3_2_a", shape=(100,), schema=simple_schema
    )
    DS2_PATH = "gcs://snark-test/cp_copy_dataset_gcs_2_a"
    DS3_PATH = "s3://snark-test/cp_copy_ds_s3_3_a"
    for i in range(100):
        ds["num", i] = 2 * i

    try:
        ds2 = ds.copy(DS2_PATH)
    except:
        dsi = Dataset(DS2_PATH)
        dsi.delete()
        ds2 = ds.copy(DS2_PATH)

    try:
        ds3 = ds2.copy(DS3_PATH)
    except:
        dsi = Dataset(DS3_PATH)
        dsi.delete()
        ds3 = ds2.copy(DS3_PATH)
    for i in range(100):
        assert ds2["num", i].compute() == 2 * i
        assert ds3["num", i].compute() == 2 * i
    ds.delete()
    ds2.delete()
    ds3.delete()
Пример #16
0
def test_dataset_copy_hub_local():
    password = os.getenv("ACTIVELOOP_HUB_PASSWORD")
    login_fn("testingacc", password)
    ds = Dataset("testingacc/cp_original_ds_hub_1", shape=(100,), schema=simple_schema)
    DS2_PATH = "./data/testing/cp_copy_ds_local_5"
    DS3_PATH = "testingacc/cp_copy_dataset_testing_2"
    for i in range(100):
        ds["num", i] = 2 * i
    try:
        ds2 = ds.copy(DS2_PATH)
    except:
        dsi = Dataset(DS2_PATH)
        dsi.delete()
        ds2 = ds.copy(DS2_PATH)

    try:
        ds3 = ds2.copy(DS3_PATH)
    except:
        dsi = Dataset(DS3_PATH)
        dsi.delete()
        ds3 = ds2.copy(DS3_PATH)

    for i in range(100):
        assert ds2["num", i].compute() == 2 * i
        assert ds3["num", i].compute() == 2 * i
    ds.delete()
    ds2.delete()
    ds3.delete()
Пример #17
0
def test_dataset_copy_azure_local():
    token = {"account_key": os.getenv("ACCOUNT_KEY")}
    ds = Dataset(
        "https://activeloop.blob.core.windows.net/activeloop-hub/cp_original_test_ds_azure_1",
        token=token,
        shape=(100,),
        schema=simple_schema,
    )
    DS2_PATH = "./data/testing/cp_copy_ds_local_4"
    DS3_PATH = "https://activeloop.blob.core.windows.net/activeloop-hub/cp_copy_test_ds_azure_2"
    for i in range(100):
        ds["num", i] = 2 * i
    try:
        ds2 = ds.copy(DS2_PATH)
    except:
        dsi = Dataset(DS2_PATH)
        dsi.delete()
        ds2 = ds.copy(DS2_PATH)

    try:
        ds3 = ds2.copy(
            DS3_PATH,
            token=token,
        )
    except:
        dsi = Dataset(
            DS3_PATH,
            token=token,
        )
        dsi.delete()
        ds3 = ds2.copy(
            DS3_PATH,
            token=token,
        )
    for i in range(100):
        assert ds2["num", i].compute() == 2 * i
        assert ds3["num", i].compute() == 2 * i
    ds.delete()
    ds2.delete()
    ds3.delete()
def benchmark_iterate_hub_tensorflow_setup(dataset_name, batch_size, prefetch_factor):
    dset = Dataset(dataset_name, cache=False, storage_cache=False, mode="r")

    loader = dset.to_tensorflow().batch(batch_size).prefetch(prefetch_factor)
    return (loader,)
Пример #19
0
def test_dataset_copy_s3_local():
    ds = Dataset(
        "./data/testing/cp_original_data_local", shape=(100,), schema=simple_schema
    )
    DS2_PATH = "s3://snark-test/cp_copy_data_s3_1_a"
    DS3_PATH = "./data/testing/cp_copy_data_local_1"
    for i in range(100):
        ds["num", i] = 2 * i
    try:
        ds2 = ds.copy(DS2_PATH)
    except:
        dsi = Dataset(DS2_PATH)
        dsi.delete()
        ds2 = ds.copy(DS2_PATH)
    try:
        ds3 = ds2.copy(DS3_PATH)
    except:
        dsi = Dataset(DS3_PATH)
        dsi.delete()
        ds3 = ds2.copy(DS3_PATH)
    for i in range(100):
        assert ds2["num", i].compute() == 2 * i
        assert ds3["num", i].compute() == 2 * i
    ds.delete()
    ds2.delete()
    ds3.delete()
Пример #20
0
def test_dataset2():
    dt = {"first": "float", "second": "float"}
    ds = Dataset(schema=dt, shape=(2,), url="./data/test/test_dataset2", mode="w")

    ds["first"][0] = 2.3
    assert ds["second"][0].numpy() != 2.3
Пример #21
0
def test_dataset_append_and_read():
    dt = {"first": "float", "second": "float"}
    os.makedirs("./data/test/test_dataset_append_and_read", exist_ok=True)
    shutil.rmtree("./data/test/test_dataset_append_and_read")

    ds = Dataset(
        schema=dt,
        shape=(2,),
        url="./data/test/test_dataset_append_and_read",
        mode="a",
    )

    ds["first"][0] = 2.3
    ds.meta_information["description"] = "This is my description"
    assert ds.meta_information["description"] == "This is my description"
    assert ds["second"][0].numpy() != 2.3
    ds.close()

    ds = Dataset(
        url="./data/test/test_dataset_append_and_read",
        mode="r",
    )
    assert ds.meta_information["description"] == "This is my description"
    ds.meta_information["hello"] = 5
    ds.delete()
    ds.close()
Пример #22
0
def test_dataset_change_schema():
    schema = {
        "abc": "uint8",
        "def": {
            "ghi": Tensor((100, 100)),
            "rst": Tensor((100, 100, 100)),
        },
    }
    ds = Dataset("./data/test_schema_change", schema=schema, shape=(100, ))
    new_schema_1 = {
        "abc": "uint8",
        "def": {
            "ghi": Tensor((200, 100)),
            "rst": Tensor((100, 100, 100)),
        },
    }
    new_schema_2 = {
        "abrs": "uint8",
        "def": {
            "ghi": Tensor((100, 100)),
            "rst": Tensor((100, 100, 100)),
        },
    }
    new_schema_3 = {
        "abc": "uint8",
        "def": {
            "ghijk": Tensor((100, 100)),
            "rst": Tensor((100, 100, 100)),
        },
    }
    new_schema_4 = {
        "abc": "uint16",
        "def": {
            "ghi": Tensor((100, 100)),
            "rst": Tensor((100, 100, 100)),
        },
    }
    new_schema_5 = {
        "abc": "uint8",
        "def": {
            "ghi": Tensor((100, 100, 3)),
            "rst": Tensor((100, 100, 100)),
        },
    }
    with pytest.raises(SchemaMismatchException):
        ds = Dataset("./data/test_schema_change",
                     schema=new_schema_1,
                     shape=(100, ))
    with pytest.raises(SchemaMismatchException):
        ds = Dataset("./data/test_schema_change",
                     schema=new_schema_2,
                     shape=(100, ))
    with pytest.raises(SchemaMismatchException):
        ds = Dataset("./data/test_schema_change",
                     schema=new_schema_3,
                     shape=(100, ))
    with pytest.raises(SchemaMismatchException):
        ds = Dataset("./data/test_schema_change",
                     schema=new_schema_4,
                     shape=(100, ))
    with pytest.raises(SchemaMismatchException):
        ds = Dataset("./data/test_schema_change",
                     schema=new_schema_5,
                     shape=(100, ))
Пример #23
0
from hub import Dataset
from hub.api.datasetview import TensorView
from hub.exceptions import NoneValueException
from hub.schema import Tensor

import numpy as np
import pytest

my_schema = {
    "image":
    Tensor((None, None, None, None), "uint8", max_shape=(10, 1920, 1080, 4)),
    "label":
    float,
}

ds = Dataset("./data/test/dataset", shape=(100, ), mode="w", schema=my_schema)


def test_tensorview_init():
    with pytest.raises(NoneValueException):
        tensorview_object = TensorView(ds, subpath=None)
    with pytest.raises(NoneValueException):
        tensorview_object_2 = TensorView(dataset=None, subpath="image")


def test_tensorview_getitem():
    images_tensorview = ds["image"]
    with pytest.raises(IndexError):
        images_tensorview["7", 0:1920, 0:1080, 0:3].compute()

Пример #24
0
def test_dataset_batch_write_2():
    schema = {"image": Image(shape=(None, None, 3), max_shape=(640, 640, 3))}
    ds = Dataset("./data/batch", shape=(100,), mode="w", schema=schema)

    ds["image", 0:14] = [np.ones((640 - i, 640, 3)) for i in range(14)]
Пример #25
0
def test_dataset_bug_2(url="./data/test/dataset", token=None):
    my_schema = {
        "image": Tensor((100, 100), "uint8"),
    }
    ds = Dataset(url, token=token, shape=(10000,), mode="w", schema=my_schema)
    ds["image", 0:1] = [np.zeros((100, 100))]
Пример #26
0
def test_dataset_filter_2():
    my_schema = {
        "fname": Text((None,), max_shape=(10,)),
        "lname": Text((None,), max_shape=(10,)),
    }
    ds = Dataset("./data/tests/filtering", shape=(100,), schema=my_schema, mode="w")
    for i in range(100):
        ds["fname", i] = "John"
        ds["lname", i] = "Doe"

    for i in [1, 3, 6, 15, 63, 96, 75]:
        ds["fname", i] = "Active"

    for i in [15, 31, 25, 75, 3, 6]:
        ds["lname", i] = "loop"

    dsv_combined = ds.filter(
        lambda x: x["fname"].compute() == "Active" and x["lname"].compute() == "loop"
    )
    tsv_combined_fname = dsv_combined["fname"]
    tsv_combined_lname = dsv_combined["lname"]
    for item in dsv_combined:
        assert item.compute() == {"fname": "Active", "lname": "loop"}
    for item in tsv_combined_fname:
        assert item.compute() == "Active"
    for item in tsv_combined_lname:
        assert item.compute() == "loop"
    dsv_1 = ds.filter(lambda x: x["fname"].compute() == "Active")
    dsv_2 = dsv_1.filter(lambda x: x["lname"].compute() == "loop")
    for item in dsv_1:
        assert item.compute()["fname"] == "Active"
    tsv_1 = dsv_1["fname"]
    tsv_2 = dsv_2["lname"]
    for item in tsv_1:
        assert item.compute() == "Active"
    for item in tsv_2:
        assert item.compute() == "loop"
    for item in dsv_2:
        assert item.compute() == {"fname": "Active", "lname": "loop"}
    assert dsv_combined.indexes == [3, 6, 15, 75]
    assert dsv_1.indexes == [1, 3, 6, 15, 63, 75, 96]
    assert dsv_2.indexes == [3, 6, 15, 75]

    dsv_3 = ds.filter(lambda x: x["lname"].compute() == "loop")
    dsv_4 = dsv_3.filter(lambda x: x["fname"].compute() == "Active")
    for item in dsv_3:
        assert item.compute()["lname"] == "loop"
    for item in dsv_4:
        assert item.compute() == {"fname": "Active", "lname": "loop"}
    assert dsv_3.indexes == [3, 6, 15, 25, 31, 75]
    assert dsv_4.indexes == [3, 6, 15, 75]

    my_schema2 = {
        "fname": Text((None,), max_shape=(10,)),
        "lname": Text((None,), max_shape=(10,)),
        "image": Image((1920, 1080, 3)),
    }
    ds = Dataset("./data/tests/filtering2", shape=(100,), schema=my_schema2, mode="w")
    with pytest.raises(KeyError):
        ds.filter(lambda x: (x["random"].compute() == np.ones((1920, 1080, 3))).all())

    for i in [1, 3, 6, 15, 63, 96, 75]:
        ds["fname", i] = "Active"
    dsv = ds.filter(lambda x: x["fname"].compute() == "Active")
    with pytest.raises(KeyError):
        dsv.filter(lambda x: (x["random"].compute() == np.ones((1920, 1080, 3))).all())
Пример #27
0
def test_dataset_copy_exception():
    ds = Dataset("./data/test_data_cp", shape=(100,), schema=simple_schema)
    DS_PATH = "./data/test_data_cp_2"
    ds2 = Dataset(DS_PATH, shape=(100,), schema=simple_schema)
    for i in range(100):
        ds["num", i] = i
        ds2["num", i] = 2 * i
    ds.flush()
    ds2.flush()
    with pytest.raises(DirectoryNotEmptyException):
        ds3 = ds.copy(DS_PATH)
    ds.delete()
    ds2.delete()
Пример #28
0
Файл: ray.py Проект: x213212/Hub
    def upload(
        self,
        results,
        url: str,
        token: dict,
        progressbar: bool = True,
        public: bool = True,
    ):
        """Batchified upload of results.
        For each tensor batchify based on its chunk and upload.
        If tensor is dynamic then still upload element by element.

        Parameters
        ----------
        dataset: hub.Dataset
            Dataset object that should be written to
        results:
            Output of transform function
        progressbar: bool
        public: bool, optional
            only applicable if using hub storage, ignored otherwise
            setting this to False allows only the user who created it to access the dataset and
            the dataset won't be visible in the visualizer to the public
        Returns
        ----------
        ds: hub.Dataset
            Uploaded dataset
        """
        if len(list(results.values())) == 0:
            shape = (0, )
        else:
            shape = (len(list(results.values())[0]), )

        ds = Dataset(
            url,
            mode="w",
            shape=shape,
            schema=self.schema,
            token=token,
            cache=False,
            public=public,
        )

        tasks = []
        for key, value in results.items():

            length = ds[key].chunksize[0]
            value = get_value(value)
            value = str_to_int(value, ds.tokenizer)
            batched_values = batchify(value, length)
            chunk_id = list(range(len(batched_values)))
            index_batched_values = list(zip(chunk_id, batched_values))

            ds._tensors[f"/{key}"].disable_dynamicness()

            results = [
                self.upload_chunk.remote(el, key=key, ds=ds)
                for el in index_batched_values
            ]
            tasks.extend(results)

        results = ray.get(tasks)
        self.set_dynamic_shapes(results, ds)
        ds.commit()
        return ds
Пример #29
0
def time_random_access(
    dataset_name="activeloop/mnist", offset=1000, span=1000, field="image"
):
    dset = Dataset(dataset_name, cache=False, storage_cache=False)
    with Timer(f"{dataset_name} read at offset {offset:03} of length {span:03}"):
        dset[field][offset : offset + span].compute()