def class_label_to_hub(tf_dt, max_shape=None): if hasattr(tf_dt, "_num_classes"): return ClassLabel( num_classes=tf_dt.num_classes, ) else: return ClassLabel(names=tf_dt.names)
def test_class_label_value(): ds = Dataset( "./data/tests/test_check_label", mode="w", shape=(5, ), schema={ "label": ClassLabel(names=["name1", "name2", "name3"]), "label/b": ClassLabel(num_classes=5), }, ) ds["label", 0:7] = 2 ds["label", 0:2] = np.array([0, 1]) ds["label", 0:3] = ["name1", "name2", "name3"] ds[0:3]["label"] = [0, "name2", 2] try: ds["label/b", 0] = 6 except Exception as ex: assert isinstance(ex, ClassLabelValueError) try: ds[0:4]["label/b"] = np.array([0, 1, 2, 3, 7]) except Exception as ex: assert isinstance(ex, ClassLabelValueError) try: ds["label", 4] = "name4" except Exception as ex: assert isinstance(ex, ClassLabelValueError) try: ds[0]["label/b"] = ["name"] except Exception as ex: assert isinstance(ex, ValueError)
def test_check_label_name(): my_schema = {"label": ClassLabel(names=["red", "green", "blue"])} ds = Dataset("./data/test/dataset2", shape=(5,), mode="w", schema=my_schema) ds["label", 0] = 1 ds["label", 1] = 2 ds["label", 0] = 1 ds["label", 1] = 2 ds["label", 2] = 0 assert ds.compute(label_name=True).tolist() == [ {"label": "green"}, {"label": "blue"}, {"label": "red"}, {"label": "red"}, {"label": "red"}, ] assert ds.compute().tolist() == [ {"label": 1}, {"label": 2}, {"label": 0}, {"label": 0}, {"label": 0}, ] assert ds[1].compute(label_name=True) == {"label": "blue"} assert ds[1].compute() == {"label": 2} assert ds[1:3].compute(label_name=True).tolist() == [ {"label": "blue"}, {"label": "red"}, ] assert ds[1:3].compute().tolist() == [{"label": 2}, {"label": 0}]
def test_class_label_value(): ds = Dataset( "./data/tests/test_check_label", mode="w", shape=(5, ), schema={ "label": ClassLabel(names=["name1", "name2", "name3"]), "label/b": ClassLabel(num_classes=5), "label_mult": ClassLabel(shape=(None, ), max_shape=(3, ), names=["name1", "name2", "name3"]), }, ) ds["label", 0:2] = np.array([0, 1]) ds["label", 0:3] = ["name1", "name2", "name3"] ds[0:3]["label"] = [0, "name2", 2] ds[0]["label_mult"] = np.array(["name1", "name3"]) ds["label_mult", 1] = "name2" ds["label_mult", 2:4] = [np.array(["name2", "name3"]), np.array(["name1"])] ds["label_mult", 3] = np.array([1, 0, 2]) ds["label_mult", 4] = [1] ds["label_mult", 3:5] = [[2, 2], [0]] try: ds["label", 0:7] = 2 except Exception as ex: assert isinstance(ex, hub.exceptions.ValueShapeError) try: ds["label/b", 0] = 6 except Exception as ex: assert isinstance(ex, ClassLabelValueError) try: ds[0:4]["label/b"] = np.array([0, 1, 2, 3, 7]) except Exception as ex: assert isinstance(ex, ClassLabelValueError) try: ds["label", 4] = "name4" except Exception as ex: assert isinstance(ex, ClassLabelValueError) try: ds[0]["label/b"] = ["name"] except Exception as ex: assert isinstance(ex, ValueError)
def convert_str_arr_to_int(array: Union[List, np.ndarray], label: ClassLabel): for i, elem in enumerate(array): if isinstance(elem, str): try: array[i] = label.str2int(elem) except KeyError: raise ClassLabelValueError(label.names, elem) if isinstance(array, np.ndarray) and array.dtype.type is np.str_: array = np.asarray(array, dtype="int8") return array
def test_dataset_filter_4(): schema = { "img": Image((None, None, 3), max_shape=(100, 100, 3)), "cl": ClassLabel(names=["cat", "dog", "horse"]), } ds = Dataset("./data/tests/filtering_4", shape=(100,), schema=schema, mode="w") for i in range(100): ds["cl", i] = 0 if i < 10 else 1 ds["img", i] = i * np.ones((5, 6, 3)) ds_filtered = ds.filter(lambda x: x["cl"].compute() == 0) assert (ds_filtered[3:8, "cl"].compute() == np.zeros((5,))).all()
def make_schema(path_to_dir, labels, dtype): """| make_schema internal function to generate the schema internally.""" max_shape = get_max_shape(path_to_dir) image_shape = (None, None, None) if labels is None: labels = ClassLabel(names=os.listdir(path_to_dir)) else: labels = ClassLabel(labels) schema = { "label": labels, "image": Tensor( shape=image_shape, max_shape=max_shape, dtype=dtype, ), } return schema
def test_dataset_filter_3(): schema = { "img": Image((None, None, 3), max_shape=(100, 100, 3)), "cl": ClassLabel(names=["cat", "dog", "horse"]), } ds = Dataset("./data/tests/filtering_3", shape=(100,), schema=schema, mode="w") for i in range(100): ds["cl", i] = 0 if i % 5 == 0 else 1 ds["img", i] = i * np.ones((5, 6, 3)) ds["cl", 4] = 2 ds_filtered = ds.filter(lambda x: x["cl"].compute() == 0) assert ds_filtered.indexes == [5 * i for i in range(20)] ds_filtered_2 = ds.filter(lambda x: x["cl"].compute() == 2) assert (ds_filtered_2["img"].compute() == 4 * np.ones((1, 5, 6, 3))).all() for item in ds_filtered_2: assert (item["img"].compute() == 4 * np.ones((5, 6, 3))).all() assert item["cl"].compute() == 2
def check_class_label(value: Union[np.ndarray, list], label: ClassLabel): """Check if value can be assigned to predefined ClassLabel""" if not isinstance(value, Iterable) or isinstance(value, str): assign_class_labels = [value] else: assign_class_labels = value for i, assign_class_label in enumerate(assign_class_labels): if isinstance(assign_class_label, str): try: assign_class_labels[i] = label.str2int(assign_class_label) except KeyError: raise ClassLabelValueError(label.names, assign_class_label) if min(assign_class_labels) < 0 or max(assign_class_labels) > label.num_classes - 1: raise ClassLabelValueError(range(label.num_classes - 1), assign_class_label) if len(assign_class_labels) == 1: return assign_class_labels[0] return assign_class_labels
def main(): schema = { "image": Image(shape=(None, None), max_shape=(28, 28)), "label": ClassLabel(num_classes=10), } path = "./data/examples/new_api_intro2" ds = Dataset(path, shape=(10, ), mode="w", schema=schema) print(len(ds)) for i in range(len(ds)): with Timer("writing single element"): ds["image", i] = np.ones((28, 28), dtype="uint8") ds["label", i] = 3 ds.resize_shape(200) print(ds.shape) print(ds["label", 100:110].numpy()) with Timer("Committing"): ds.flush() ds = Dataset(path) print(ds.schema) print(ds["image", 0].compute())
import numpy as np import hub from hub.schema import Image, ClassLabel from hub.utils import Timer schema = { "image": Image((28, 28), chunks=(1000, 28, 28)), "label": ClassLabel(num_classes=10), } def main(): sample_count = 70000 step = 10 with Timer("Time"): ds = hub.Dataset( "./data/examples/mnist_upload_speed_benchmark", mode="w", schema=schema, shape=(sample_count, ), cache=2**26, ) arr = (np.random.rand(step, 28, 28) * 100).astype("uint8") for i in range(0, sample_count, step): # with Timer(f"Sample {i}"): ds["image", i:i + step] = arr
""" import glob import os import numpy as np import PIL.Image import hub from hub.schema import ClassLabel, Image # Create a new dataset schema = { "image": Image(shape=(None, None, None), max_shape=(3000, 3000, 3), dtype="uint8"), "label": ClassLabel(num_classes=2), } tag = "/tmp/chest_xray/train" len_ds = 5216 ds = hub.Dataset(tag, mode="w+", shape=(len_ds, ), schema=schema) # Transform function @hub.transform(schema=schema, scheduler="threaded", workers=8) def fill_ds(filename): if os.path.basename(os.path.dirname(filename)) == "NORMAL": label = 0 else: label = 1 image = np.array(PIL.Image.open(filename)) if len(image.shape) == 2:
If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. """ from hub import Dataset from hub.api.datasetview import TensorView from hub.exceptions import NoneValueException from hub.schema import Tensor, ClassLabel import numpy as np import pytest my_schema = { "image": Tensor((None, None, None, None), "uint8", max_shape=(10, 1920, 1080, 4)), "label": ClassLabel(num_classes=3), } my_schema2 = { "image": Tensor((None, None, None, None), "uint8", max_shape=(10, 1920, 1080, 4)), "label": ClassLabel(names=["red", "green", "blue"]), } ds = Dataset("./data/test/dataset", shape=(100, ), mode="w", schema=my_schema) ds2 = Dataset("./data/test/dataset2", shape=(5, ), mode="w", schema=my_schema2) ds["label", 0] = 1 ds["label", 1] = 2 ds["label", 2] = 0 ds2["label", 0] = 1
def _from_supervisely(project, scheduler: str = "single", workers: int = 1): try: import supervisely_lib as sly from supervisely_lib.project import project as sly_image_project from supervisely_lib.project import video_project as sly_video_project from skvideo.io import FFmpegReader, vread except ModuleNotFoundError: raise ModuleNotInstalledException("supervisely") if isinstance(project, str): with open(project + "meta.json") as meta_file: project_meta_dict = json.load(meta_file) instantiated = False else: project_meta_dict = project.meta.to_json() instantiated = True project_type = project_meta_dict["projectType"] mode = sly.OpenMode.READ def infer_image(paths): bboxes, masks = [], [] classes_bb, classes_mask = [], [] item_path, item_ann_path = paths ann = sly.Annotation.load_json_file(item_ann_path, project.meta) ann_dict = ann.to_json() sizes = (ann_dict["size"]["height"], ann_dict["size"]["width"]) for obj in ann_dict["objects"]: if obj["geometryType"] == "rectangle": bboxes.append([ item for sublist in obj["points"]["exterior"] for item in sublist ]) classes_bb.append(obj["classTitle"]) elif obj["geometryType"] == "polygon": img = PIL.Image.new("L", (sizes[1], sizes[0]), 0) PIL.ImageDraw.Draw(img).polygon( [tuple(obj) for obj in obj["points"]["exterior"]], outline=1, fill=1, ) masks.append(np.array(img)) classes_mask.append(obj["classTitle"]) return sizes, bboxes, masks, classes_bb, classes_mask def infer_video(paths): item_path, item_ann_path = paths vreader = FFmpegReader(item_path) return (vreader.getShape(), ) def infer_project(project, project_type, read_mode): if project_type == "images": if not instantiated: project = sly_image_project.Project(project, mode) max_shape = (0, 0) return ( project, Image, infer_image, max_shape, ) elif project_type == "videos": if not instantiated: project = sly_video_project.VideoProject(project, mode) max_shape = (0, 0, 0, 0) return ( project, Video, infer_video, max_shape, ) project, main_blob, infer_ds, max_shape = infer_project( project, project_type, mode) image_paths = [] label_names = [] max_num_bboxes = 0 max_num_polys = 0 masks = False datasets = project.datasets.items() uniform = True for ds in datasets: for i, item in enumerate(ds): path = ds.get_item_paths(item) image_paths.append(path) inf = infer_ds(path) if len(inf) > 1: if inf[3]: label_names.extend(inf[3]) if len(inf[3]) > max_num_bboxes: max_num_bboxes = len(inf[3]) if inf[4]: label_names.extend(inf[4]) if len(inf[4]) > max_num_polys: max_num_polys = len(inf[4]) if inf[2]: masks = True shape = inf[0] max_shape = np.maximum(shape, max_shape) if uniform and max_shape.any() and (shape != max_shape).any(): uniform = False label_names = list(np.unique(label_names)) items = chain(*datasets) idatasets = iter(datasets) ds, i = next(idatasets), 0 key = "shape" if uniform else "max_shape" if project_type == "images": read = sly.imaging.image.read blob_shape = {key: (*max_shape.tolist(), 3)} elif project_type == "videos": read = vread blob_shape = {key: max_shape.tolist()} if key == "max_shape": blob_shape["shape"] = (None, None, None, 3) schema = { project_type: main_blob(**blob_shape), } if max_num_bboxes: schema["bbox"] = BBox(shape=(None, 4), max_shape=(max_num_bboxes, 4)) if label_names: schema["label"] = ClassLabel( shape=(None, ), max_shape=(max(max_num_bboxes, max_num_polys), ), names=label_names, ) if masks: schema["mask"] = Mask(shape=(None, None, None), max_shape=(*max_shape.tolist(), 1)) @hub.transform(schema=schema, scheduler=scheduler, workers=workers) def transformation(item): nonlocal i, ds sample = {} if i >= len(ds): ds, i = next(idatasets), 0 item_path, item_ann_path = ds.get_item_paths(item) i += 1 _, bboxes, masks, classes_bbox, classes_mask = infer_ds( (item_path, item_ann_path)) sample[project_type] = read(item_path) if bboxes: sample["bbox"] = np.array(bboxes) sample["label"] = [label_names.index(i) for i in classes_bbox] if masks: sample["mask"] = np.expand_dims(masks[0], -1) sample["label"] = [label_names.index(i) for i in classes_mask] return sample return transformation(list(items))