Пример #1
0
 def class_label_to_hub(tf_dt, max_shape=None):
     if hasattr(tf_dt, "_num_classes"):
         return ClassLabel(
             num_classes=tf_dt.num_classes,
         )
     else:
         return ClassLabel(names=tf_dt.names)
Пример #2
0
def test_class_label_value():
    ds = Dataset(
        "./data/tests/test_check_label",
        mode="w",
        shape=(5, ),
        schema={
            "label": ClassLabel(names=["name1", "name2", "name3"]),
            "label/b": ClassLabel(num_classes=5),
        },
    )
    ds["label", 0:7] = 2
    ds["label", 0:2] = np.array([0, 1])
    ds["label", 0:3] = ["name1", "name2", "name3"]
    ds[0:3]["label"] = [0, "name2", 2]
    try:
        ds["label/b", 0] = 6
    except Exception as ex:
        assert isinstance(ex, ClassLabelValueError)
    try:
        ds[0:4]["label/b"] = np.array([0, 1, 2, 3, 7])
    except Exception as ex:
        assert isinstance(ex, ClassLabelValueError)
    try:
        ds["label", 4] = "name4"
    except Exception as ex:
        assert isinstance(ex, ClassLabelValueError)
    try:
        ds[0]["label/b"] = ["name"]
    except Exception as ex:
        assert isinstance(ex, ValueError)
Пример #3
0
def test_check_label_name():
    my_schema = {"label": ClassLabel(names=["red", "green", "blue"])}
    ds = Dataset("./data/test/dataset2", shape=(5,), mode="w", schema=my_schema)
    ds["label", 0] = 1
    ds["label", 1] = 2
    ds["label", 0] = 1
    ds["label", 1] = 2
    ds["label", 2] = 0
    assert ds.compute(label_name=True).tolist() == [
        {"label": "green"},
        {"label": "blue"},
        {"label": "red"},
        {"label": "red"},
        {"label": "red"},
    ]
    assert ds.compute().tolist() == [
        {"label": 1},
        {"label": 2},
        {"label": 0},
        {"label": 0},
        {"label": 0},
    ]
    assert ds[1].compute(label_name=True) == {"label": "blue"}
    assert ds[1].compute() == {"label": 2}
    assert ds[1:3].compute(label_name=True).tolist() == [
        {"label": "blue"},
        {"label": "red"},
    ]
    assert ds[1:3].compute().tolist() == [{"label": 2}, {"label": 0}]
Пример #4
0
def test_class_label_value():
    ds = Dataset(
        "./data/tests/test_check_label",
        mode="w",
        shape=(5, ),
        schema={
            "label":
            ClassLabel(names=["name1", "name2", "name3"]),
            "label/b":
            ClassLabel(num_classes=5),
            "label_mult":
            ClassLabel(shape=(None, ),
                       max_shape=(3, ),
                       names=["name1", "name2", "name3"]),
        },
    )
    ds["label", 0:2] = np.array([0, 1])
    ds["label", 0:3] = ["name1", "name2", "name3"]
    ds[0:3]["label"] = [0, "name2", 2]
    ds[0]["label_mult"] = np.array(["name1", "name3"])
    ds["label_mult", 1] = "name2"
    ds["label_mult", 2:4] = [np.array(["name2", "name3"]), np.array(["name1"])]
    ds["label_mult", 3] = np.array([1, 0, 2])
    ds["label_mult", 4] = [1]
    ds["label_mult", 3:5] = [[2, 2], [0]]
    try:
        ds["label", 0:7] = 2
    except Exception as ex:
        assert isinstance(ex, hub.exceptions.ValueShapeError)
    try:
        ds["label/b", 0] = 6
    except Exception as ex:
        assert isinstance(ex, ClassLabelValueError)
    try:
        ds[0:4]["label/b"] = np.array([0, 1, 2, 3, 7])
    except Exception as ex:
        assert isinstance(ex, ClassLabelValueError)
    try:
        ds["label", 4] = "name4"
    except Exception as ex:
        assert isinstance(ex, ClassLabelValueError)
    try:
        ds[0]["label/b"] = ["name"]
    except Exception as ex:
        assert isinstance(ex, ValueError)
Пример #5
0
def convert_str_arr_to_int(array: Union[List, np.ndarray], label: ClassLabel):
    for i, elem in enumerate(array):
        if isinstance(elem, str):
            try:
                array[i] = label.str2int(elem)
            except KeyError:
                raise ClassLabelValueError(label.names, elem)
    if isinstance(array, np.ndarray) and array.dtype.type is np.str_:
        array = np.asarray(array, dtype="int8")
    return array
Пример #6
0
def test_dataset_filter_4():
    schema = {
        "img": Image((None, None, 3), max_shape=(100, 100, 3)),
        "cl": ClassLabel(names=["cat", "dog", "horse"]),
    }
    ds = Dataset("./data/tests/filtering_4", shape=(100,), schema=schema, mode="w")
    for i in range(100):
        ds["cl", i] = 0 if i < 10 else 1
        ds["img", i] = i * np.ones((5, 6, 3))
    ds_filtered = ds.filter(lambda x: x["cl"].compute() == 0)
    assert (ds_filtered[3:8, "cl"].compute() == np.zeros((5,))).all()
Пример #7
0
        def make_schema(path_to_dir, labels, dtype):
            """| make_schema internal function to generate the schema internally."""
            max_shape = get_max_shape(path_to_dir)
            image_shape = (None, None, None)
            if labels is None:
                labels = ClassLabel(names=os.listdir(path_to_dir))
            else:
                labels = ClassLabel(labels)
            schema = {
                "label":
                labels,
                "image":
                Tensor(
                    shape=image_shape,
                    max_shape=max_shape,
                    dtype=dtype,
                ),
            }

            return schema
Пример #8
0
def test_dataset_filter_3():
    schema = {
        "img": Image((None, None, 3), max_shape=(100, 100, 3)),
        "cl": ClassLabel(names=["cat", "dog", "horse"]),
    }
    ds = Dataset("./data/tests/filtering_3", shape=(100,), schema=schema, mode="w")
    for i in range(100):
        ds["cl", i] = 0 if i % 5 == 0 else 1
        ds["img", i] = i * np.ones((5, 6, 3))
    ds["cl", 4] = 2
    ds_filtered = ds.filter(lambda x: x["cl"].compute() == 0)
    assert ds_filtered.indexes == [5 * i for i in range(20)]
    ds_filtered_2 = ds.filter(lambda x: x["cl"].compute() == 2)
    assert (ds_filtered_2["img"].compute() == 4 * np.ones((1, 5, 6, 3))).all()
    for item in ds_filtered_2:
        assert (item["img"].compute() == 4 * np.ones((5, 6, 3))).all()
        assert item["cl"].compute() == 2
Пример #9
0
def check_class_label(value: Union[np.ndarray, list], label: ClassLabel):
    """Check if value can be assigned to predefined ClassLabel"""
    if not isinstance(value, Iterable) or isinstance(value, str):
        assign_class_labels = [value]
    else:
        assign_class_labels = value
    for i, assign_class_label in enumerate(assign_class_labels):
        if isinstance(assign_class_label, str):
            try:
                assign_class_labels[i] = label.str2int(assign_class_label)
            except KeyError:
                raise ClassLabelValueError(label.names, assign_class_label)

    if min(assign_class_labels) < 0 or max(assign_class_labels) > label.num_classes - 1:
        raise ClassLabelValueError(range(label.num_classes - 1), assign_class_label)
    if len(assign_class_labels) == 1:
        return assign_class_labels[0]
    return assign_class_labels
Пример #10
0
def main():
    schema = {
        "image": Image(shape=(None, None), max_shape=(28, 28)),
        "label": ClassLabel(num_classes=10),
    }
    path = "./data/examples/new_api_intro2"

    ds = Dataset(path, shape=(10, ), mode="w", schema=schema)
    print(len(ds))
    for i in range(len(ds)):
        with Timer("writing single element"):
            ds["image", i] = np.ones((28, 28), dtype="uint8")
            ds["label", i] = 3

    ds.resize_shape(200)
    print(ds.shape)
    print(ds["label", 100:110].numpy())
    with Timer("Committing"):
        ds.flush()

    ds = Dataset(path)
    print(ds.schema)
    print(ds["image", 0].compute())
import numpy as np

import hub
from hub.schema import Image, ClassLabel
from hub.utils import Timer

schema = {
    "image": Image((28, 28), chunks=(1000, 28, 28)),
    "label": ClassLabel(num_classes=10),
}


def main():
    sample_count = 70000
    step = 10
    with Timer("Time"):

        ds = hub.Dataset(
            "./data/examples/mnist_upload_speed_benchmark",
            mode="w",
            schema=schema,
            shape=(sample_count, ),
            cache=2**26,
        )

        arr = (np.random.rand(step, 28, 28) * 100).astype("uint8")

        for i in range(0, sample_count, step):
            # with Timer(f"Sample {i}"):
            ds["image", i:i + step] = arr
Пример #12
0
"""
import glob
import os

import numpy as np
import PIL.Image

import hub
from hub.schema import ClassLabel, Image

# Create a new dataset
schema = {
    "image":
    Image(shape=(None, None, None), max_shape=(3000, 3000, 3), dtype="uint8"),
    "label":
    ClassLabel(num_classes=2),
}
tag = "/tmp/chest_xray/train"
len_ds = 5216
ds = hub.Dataset(tag, mode="w+", shape=(len_ds, ), schema=schema)


# Transform function
@hub.transform(schema=schema, scheduler="threaded", workers=8)
def fill_ds(filename):
    if os.path.basename(os.path.dirname(filename)) == "NORMAL":
        label = 0
    else:
        label = 1
    image = np.array(PIL.Image.open(filename))
    if len(image.shape) == 2:
Пример #13
0
If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
"""

from hub import Dataset
from hub.api.datasetview import TensorView
from hub.exceptions import NoneValueException
from hub.schema import Tensor, ClassLabel

import numpy as np
import pytest

my_schema = {
    "image":
    Tensor((None, None, None, None), "uint8", max_shape=(10, 1920, 1080, 4)),
    "label":
    ClassLabel(num_classes=3),
}
my_schema2 = {
    "image":
    Tensor((None, None, None, None), "uint8", max_shape=(10, 1920, 1080, 4)),
    "label":
    ClassLabel(names=["red", "green", "blue"]),
}

ds = Dataset("./data/test/dataset", shape=(100, ), mode="w", schema=my_schema)
ds2 = Dataset("./data/test/dataset2", shape=(5, ), mode="w", schema=my_schema2)

ds["label", 0] = 1
ds["label", 1] = 2
ds["label", 2] = 0
ds2["label", 0] = 1
Пример #14
0
def _from_supervisely(project, scheduler: str = "single", workers: int = 1):
    try:
        import supervisely_lib as sly
        from supervisely_lib.project import project as sly_image_project
        from supervisely_lib.project import video_project as sly_video_project
        from skvideo.io import FFmpegReader, vread
    except ModuleNotFoundError:
        raise ModuleNotInstalledException("supervisely")
    if isinstance(project, str):
        with open(project + "meta.json") as meta_file:
            project_meta_dict = json.load(meta_file)
        instantiated = False
    else:
        project_meta_dict = project.meta.to_json()
        instantiated = True
    project_type = project_meta_dict["projectType"]
    mode = sly.OpenMode.READ

    def infer_image(paths):
        bboxes, masks = [], []
        classes_bb, classes_mask = [], []
        item_path, item_ann_path = paths

        ann = sly.Annotation.load_json_file(item_ann_path, project.meta)
        ann_dict = ann.to_json()
        sizes = (ann_dict["size"]["height"], ann_dict["size"]["width"])
        for obj in ann_dict["objects"]:
            if obj["geometryType"] == "rectangle":
                bboxes.append([
                    item for sublist in obj["points"]["exterior"]
                    for item in sublist
                ])
                classes_bb.append(obj["classTitle"])
            elif obj["geometryType"] == "polygon":
                img = PIL.Image.new("L", (sizes[1], sizes[0]), 0)
                PIL.ImageDraw.Draw(img).polygon(
                    [tuple(obj) for obj in obj["points"]["exterior"]],
                    outline=1,
                    fill=1,
                )
                masks.append(np.array(img))
                classes_mask.append(obj["classTitle"])
        return sizes, bboxes, masks, classes_bb, classes_mask

    def infer_video(paths):
        item_path, item_ann_path = paths
        vreader = FFmpegReader(item_path)
        return (vreader.getShape(), )

    def infer_project(project, project_type, read_mode):
        if project_type == "images":
            if not instantiated:
                project = sly_image_project.Project(project, mode)
            max_shape = (0, 0)
            return (
                project,
                Image,
                infer_image,
                max_shape,
            )
        elif project_type == "videos":
            if not instantiated:
                project = sly_video_project.VideoProject(project, mode)
            max_shape = (0, 0, 0, 0)
            return (
                project,
                Video,
                infer_video,
                max_shape,
            )

    project, main_blob, infer_ds, max_shape = infer_project(
        project, project_type, mode)

    image_paths = []
    label_names = []
    max_num_bboxes = 0
    max_num_polys = 0
    masks = False
    datasets = project.datasets.items()
    uniform = True
    for ds in datasets:
        for i, item in enumerate(ds):
            path = ds.get_item_paths(item)
            image_paths.append(path)
            inf = infer_ds(path)
            if len(inf) > 1:
                if inf[3]:
                    label_names.extend(inf[3])
                    if len(inf[3]) > max_num_bboxes:
                        max_num_bboxes = len(inf[3])
                if inf[4]:
                    label_names.extend(inf[4])
                    if len(inf[4]) > max_num_polys:
                        max_num_polys = len(inf[4])
                if inf[2]:
                    masks = True
            shape = inf[0]
            max_shape = np.maximum(shape, max_shape)
            if uniform and max_shape.any() and (shape != max_shape).any():
                uniform = False
    label_names = list(np.unique(label_names))
    items = chain(*datasets)
    idatasets = iter(datasets)
    ds, i = next(idatasets), 0
    key = "shape" if uniform else "max_shape"
    if project_type == "images":
        read = sly.imaging.image.read
        blob_shape = {key: (*max_shape.tolist(), 3)}
    elif project_type == "videos":
        read = vread
        blob_shape = {key: max_shape.tolist()}
        if key == "max_shape":
            blob_shape["shape"] = (None, None, None, 3)

    schema = {
        project_type: main_blob(**blob_shape),
    }
    if max_num_bboxes:
        schema["bbox"] = BBox(shape=(None, 4), max_shape=(max_num_bboxes, 4))
    if label_names:
        schema["label"] = ClassLabel(
            shape=(None, ),
            max_shape=(max(max_num_bboxes, max_num_polys), ),
            names=label_names,
        )
    if masks:
        schema["mask"] = Mask(shape=(None, None, None),
                              max_shape=(*max_shape.tolist(), 1))

    @hub.transform(schema=schema, scheduler=scheduler, workers=workers)
    def transformation(item):
        nonlocal i, ds
        sample = {}
        if i >= len(ds):
            ds, i = next(idatasets), 0
        item_path, item_ann_path = ds.get_item_paths(item)
        i += 1
        _, bboxes, masks, classes_bbox, classes_mask = infer_ds(
            (item_path, item_ann_path))
        sample[project_type] = read(item_path)
        if bboxes:
            sample["bbox"] = np.array(bboxes)
            sample["label"] = [label_names.index(i) for i in classes_bbox]
        if masks:
            sample["mask"] = np.expand_dims(masks[0], -1)
            sample["label"] = [label_names.index(i) for i in classes_mask]
        return sample

    return transformation(list(items))