Exemplo n.º 1
0
    def test_upload_frame_without_sensor(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        dataset_client = gas_client.create_dataset(dataset_name, is_fusion=True)
        dataset_client.create_draft("draft-1")
        segment_client = dataset_client.get_or_create_segment("segment1")

        path = tmp_path / "sub"
        path.mkdir()

        frame = Frame()
        local_path = path / "hello0.txt"
        local_path.write_text("CONTENT")
        data = Data(local_path=str(local_path))
        frame[LIDAR_DATA["name"]] = data

        # If not uploading sensor, uploading frame is not allowed
        with pytest.raises(ResponseError):
            segment_client.upload_frame(frame, timestamp=0)

        gas_client.delete_dataset(dataset_name)
Exemplo n.º 2
0
    def test_upload_data_without_label(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        dataset_client = gas_client.create_dataset(dataset_name)
        dataset_client.create_draft("draft-1")
        segment_client = dataset_client.get_or_create_segment("segment1")
        path = tmp_path / "sub"
        path.mkdir()

        for i in range(5):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            segment_client.upload_data(Data(local_path=str(local_path)))

        data = segment_client.list_data()
        assert data[0].path == "hello0.txt"
        assert data[0].open().read() == b"CONTENT"
        assert not data[0].label
        # todo: match the input and output label

        gas_client.delete_dataset(dataset_name)
Exemplo n.º 3
0
def _get_data_10k(
    image_path: str,
    original_mask_paths: Dict[str, str],
    label_content: Dict[str, Any],
    single_channel_mask_paths: Dict[str, str],
) -> Data:
    data = Data(image_path)
    polygon: List[LabeledPolygon] = []
    for label_info in label_content["labels"]:
        if "poly2d" in label_info:
            _add_poly2d_label_10k(label_info, polygon)
    label = data.label
    label.polygon = polygon
    stem = os.path.splitext(os.path.basename(image_path))[0]
    label.semantic_mask = SemanticMask(
        os.path.join(original_mask_paths["sem"], f"{stem}.png"))
    label.instance_mask = _get_instance_mask(stem, original_mask_paths["ins"],
                                             single_channel_mask_paths["ins"])
    label.panoptic_mask = _get_panoptic_mask(stem, original_mask_paths["pan"],
                                             single_channel_mask_paths["pan"])
    return data
Exemplo n.º 4
0
def _get_data_part2(root_path: str, aniamls: Iterable[str]) -> Iterator[Data]:
    try:
        import xmltodict  # pylint: disable=import-outside-toplevel
    except ModuleNotFoundError as error:
        raise ModuleImportError(module_name=error.name) from error

    for animal in aniamls:
        for image_path in glob(os.path.join(root_path, "animalpose_image_part2", animal, "*.jpeg")):
            data = Data(image_path, target_remote_path=f"{animal}/{os.path.basename(image_path)}")

            annotation_path = os.path.join(
                root_path,
                "animalpose_anno2",
                animal,
                f"{os.path.splitext(os.path.basename(image_path))[0]}.xml",
            )

            with open(annotation_path, encoding="utf-8") as fp:
                labels = xmltodict.parse(fp.read())

            box2d = labels["annotation"]["visible_bounds"]
            data.label.box2d = [
                LabeledBox2D.from_xywh(
                    x=float(box2d["@xmin"]),
                    y=float(box2d["@xmax"]),  # xmax means ymin in the annotation
                    width=float(box2d["@width"]),
                    height=float(box2d["@height"]),
                    category=animal,
                )
            ]

            keypoints2d = LabeledKeypoints2D(category=animal)
            for keypoint in labels["annotation"]["keypoints"]["keypoint"]:
                keypoints2d.append(
                    Keypoint2D(
                        float(keypoint["@x"]), float(keypoint["@y"]), int(keypoint["@visible"])
                    )
                )
            data.label.keypoints2d = [keypoints2d]
            yield data
Exemplo n.º 5
0
def KylbergTexture(path: str) -> Dataset:
    """`Kylberg Texture <http://www.cb.uu.se/~gustaf/texture/>`_ dataset.

    The file structure should be like::

        <path>
            originalPNG/
                <imagename>.png
                ...
            withoutRotateAll/
                <imagename>.png
                ...
            RotateAll/
                <imagename>.png
                ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))

    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(os.path.join(os.path.dirname(__file__), "catalog.json"))

    for segment_name, label_getter in _LABEL_GETTERS.items():
        image_paths = glob(os.path.join(root_path, segment_name, "*.png"))

        segment = dataset.create_segment(segment_name)

        for image_path in image_paths:
            data = Data(image_path)
            stem = os.path.splitext(os.path.basename(image_path))[0]
            data.label.classification = label_getter(stem)
            segment.append(data)

    return dataset
    def test_cache_fusion_dataset(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        dataset_client = gas_client.create_dataset(dataset_name,
                                                   is_fusion=True)
        dataset_client.create_draft("draft-1")

        segment = FusionSegment("Segment1")
        segment.sensors = Sensors.loads(_SENSORS_DATA)

        paths = {"Lidar1": tmp_path / "lidar", "Camera1": tmp_path / "camera"}
        for path in paths.values():
            path.mkdir()

        for i in range(_SEGMENT_LENGTH):
            frame = Frame()
            for sensor_data in _SENSORS_DATA:
                sensor_name = sensor_data["name"]
                data_path = paths[sensor_name] / f"{sensor_name}{i}.txt"
                data_path.write_text("CONTENT")
                frame[sensor_name] = Data(local_path=str(data_path))
            segment.append(frame)

        dataset_client.upload_segment(segment)
        dataset_client.commit("commit-1")
        cache_path = tmp_path / "cache_test"
        dataset_client.enable_cache(str(cache_path))
        segment1 = FusionSegment(name="Segment1", client=dataset_client)
        for frame in segment1:
            for data in frame.values():
                data.open()

        segment_cache_path = (cache_path / dataset_client.dataset_id /
                              dataset_client.status.commit_id / "Segment1")
        correct_files = set(
            segment_cache_path / f'{sensor_data["name"]}{i}.txt'
            for i in range(_SEGMENT_LENGTH) for sensor_data in _SENSORS_DATA)
        assert set(segment_cache_path.glob("*.txt")) == correct_files

        gas_client.delete_dataset(dataset_name)
Exemplo n.º 7
0
def CoinImage(path: str) -> Dataset:
    """`Coin Image <https://cvl.tuwien.ac.at/research/cvl-databases/coin-image-dataset/>`_ dataset.

    The file structure should be like::

        <path>
            classes.csv
            <imagename>.png
            ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))

    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))
    segment = dataset.create_segment()

    csv_path = os.path.join(root_path, "classes.csv")
    with open(csv_path, "r", encoding="utf-8") as fp:
        reader = csv.reader(fp, delimiter=";")
        mapping: Dict[str, str] = dict(
            row for row in reader)  # type: ignore[arg-type, misc]

    image_paths = glob(os.path.join(root_path, "*.png"))

    for image_path in image_paths:
        data = Data(image_path)
        filename = os.path.basename(image_path)
        class_id = filename[5:].split("_", 1)[0]
        data.label.classification = Classification(category=mapping[class_id])
        segment.append(data)

    return dataset
Exemplo n.º 8
0
def BioIDFace(path: str) -> Dataset:
    """`BioID Face <https://www.bioid.com/facedb/>`_ dataset.

    The folder structure should be like::

                <path>
                    BioID-FaceDatabase-V1.2/
                        BioID_0000.eye
                        BioID_0000.pgm
                        ...
                    points_20/
                        bioid_0000.pts

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))

    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))
    segment = dataset.create_segment()

    image_paths = glob(
        os.path.join(root_path, "BioID-FaceDatabase-V1.2", "*.pgm"))
    face_keypoints_paths = glob(os.path.join(root_path, "points_20", "*.pts"))

    for image_path, face_keypoints_path in zip(image_paths,
                                               face_keypoints_paths):
        data = Data(image_path)
        data.label.keypoints2d = _get_label(
            f"{os.path.splitext(image_path)[0]}.eye", face_keypoints_path)

        segment.append(data)

    return dataset
Exemplo n.º 9
0
def ImageEmotionAbstract(path: str) -> Dataset:
    """`Image Emotion-abstract <https://www.imageemotion.org/>`_ dataset.

    The file structure should be like::

        <path>
            ABSTRACT_groundTruth.csv
            abstract_xxxx.jpg
            ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))

    dataset = Dataset(DATASET_NAME_ABSTRACT)
    dataset.load_catalog(os.path.join(os.path.dirname(__file__), "catalog_abstract.json"))
    segment = dataset.create_segment()

    csv_path = os.path.join(root_path, "ABSTRACT_groundTruth.csv")
    with open(csv_path, encoding="utf-8") as fp:
        reader = csv.DictReader(fp)
        reader.fieldnames = [
            field.strip("'") for field in reader.fieldnames  # type:ignore[union-attr]
        ]

        for row in reader:
            image_path = os.path.join(root_path, row.pop("").strip("'"))

            data = Data(image_path)
            values = {key: int(value) for key, value in row.items()}

            data.label.classification = Classification(attributes=values)
            segment.append(data)

    return dataset
Exemplo n.º 10
0
def _get_mots_data(
    image_path: str,
    original_mask_subdir: str,
    semantic_subdir: str,
    instance_subdir: str,
    stem: str,
    *,
    label_content: Dict[str, Any],
) -> Data:
    data = Data(image_path)
    labeled_multipolygons = []
    for label_info in label_content.get("labels", ()):
        if "poly2d" not in label_info:
            continue
        labeled_multipolygon = LabeledMultiPolygon(
            polygons=(poly2d_info["vertices"]
                      for poly2d_info in label_info["poly2d"]),
            category=label_info["category"],
            attributes=label_info["attributes"],
            instance=str(label_info["id"]),
        )
        labeled_multipolygons.append(labeled_multipolygon)

    semantic_path = os.path.join(semantic_subdir, f"{stem}.png")
    instance_path = os.path.join(instance_subdir, f"{stem}.png")
    mask_info = _save_and_get_mask_info(
        os.path.join(original_mask_subdir, f"{stem}.png"),
        semantic_path,
        instance_path,
        os.path.join(instance_subdir, f"{stem}.json"),
    )
    ins_mask = InstanceMask(instance_path)
    ins_mask.all_attributes = mask_info["all_attributes"]

    label = data.label
    label.multi_polygon = labeled_multipolygons
    label.semantic_mask = SemanticMask(semantic_path)
    label.instance_mask = ins_mask
    return data
Exemplo n.º 11
0
def DogsVsCats(path: str) -> Dataset:
    """`Dogs vs Cats <https://www.kaggle.com/c/dogs-vs-cats>`_ dataset.

    The file structure should be like::

        <path>
            train/
                cat.0.jpg
                ...
                dog.0.jpg
                ...
            test/
                1000.jpg
                1001.jpg
                ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))
    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))

    for segment_name, is_labeled in _SEGMENTS.items():
        segment = dataset.create_segment(segment_name)
        image_paths = glob(os.path.join(root_path, segment_name, "*.jpg"))
        for image_path in image_paths:
            data = Data(image_path)
            if is_labeled:
                data.label.classification = Classification(
                    os.path.basename(image_path)[:3])
            segment.append(data)

    return dataset
Exemplo n.º 12
0
def _load_positive_segment(segment_name: str, segment_path: str) -> Segment:
    if segment_name.startswith("vid"):
        # Pad zero for segment name to change "vid0" to "vid00"
        segment_name = f"{segment_name[:3]}{int(segment_name[3:]):02}"
    segment = Segment(segment_name)
    annotation_file = glob(
        os.path.join(segment_path, "frameAnnotations-*", "frameAnnotations.csv")
    )[0]
    image_folder = os.path.dirname(annotation_file)
    pre_filename = ""
    with open(annotation_file, encoding="utf-8") as fp:
        for annotation in csv.DictReader(fp, delimiter=";"):
            filename = annotation["Filename"]

            if filename != pre_filename:
                data = Data(os.path.join(image_folder, filename))
                data.label.box2d = []
                segment.append(data)
                pre_filename = filename

            occluded, on_another_road = annotation["Occluded,On another road"].split(",", 1)
            data.label.box2d.append(
                LabeledBox2D(
                    int(annotation["Upper left corner X"]),
                    int(annotation["Upper left corner Y"]),
                    int(annotation["Lower right corner X"]),
                    int(annotation["Lower right corner Y"]),
                    category=annotation["Annotation tag"],
                    attributes={
                        "Occluded": bool(int(occluded)),
                        "On another road": bool(int(on_another_road)),
                        "Origin file": annotation["Origin file"],
                        "Origin frame number": int(annotation["Origin frame number"]),
                        "Origin track": annotation["Origin track"],
                        "Origin track frame number": int(annotation["Origin track frame number"]),
                    },
                )
            )
    return segment
Exemplo n.º 13
0
def KenyanFoodOrNonfood(path: str) -> Dataset:
    """`Kenyan Food or Nonfood <https://github.com/monajalal/Kenyan-Food>`_ dataset.

    The file structure should be like::

        <path>
            images/
                food/
                    236171947206673742.jpg
                    ...
                nonfood/
                    168223407.jpg
                    ...
            data.csv
            split.py
            test.txt
            train.txt

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))
    dataset = Dataset(DATASET_NAME_FOOD_OR_NONFOOD)
    dataset.load_catalog(os.path.join(os.path.dirname(__file__), "catalog_food_or_nonfood.json"))

    for segment_name, filename in SEGMENTS_FOOD_OR_NONFOOD.items():
        segment = dataset.create_segment(segment_name)
        with open(os.path.join(root_path, filename), encoding="utf-8") as fp:
            for image_path in fp:
                image_path = os.path.join(root_path, image_path)
                data = Data(image_path.strip())
                category = image_path.split("/")[1]
                data.label.classification = Classification(category)
                segment.append(data)
    return dataset
Exemplo n.º 14
0
def _get_data(path: str, annotations: Any,
              flag: bool) -> Iterator[Tuple[Data, str]]:
    filepath_to_data: Dict[str, Data] = {}

    for annotation in annotations:
        filepath = annotation["filepath"][0]

        keypoints = LabeledKeypoints2D(
            annotation["coords"].T[_VALID_KEYPOINT_INDICES],
            attributes={
                "poselet_hit_idx": annotation["poselet_hit_idx"].T.tolist()
            },
        )
        box2d = LabeledBox2D(*annotation["torsobox"][0].tolist())

        if filepath not in filepath_to_data:
            data = Data(os.path.join(path, "images", filepath))
            data.label.keypoints2d = [keypoints]
            data.label.box2d = [box2d]
            attribute = {"currframe": int(annotation["currframe"][0][0])}

            if flag:
                attribute["isunchecked"] = bool(annotation["isunchecked"])
            data.label.classification = Classification(
                category=annotation["moviename"][0], attributes=attribute)
            filepath_to_data[filepath] = data

            if annotation["istrain"]:
                segment_name = "train"
            elif annotation["istest"]:
                segment_name = "test"
            else:
                segment_name = "bad"
            yield data, segment_name

        else:
            image_data = filepath_to_data[filepath]
            image_data.label.keypoints2d.append(keypoints)
            image_data.label.box2d.append(box2d)
Exemplo n.º 15
0
def TLR(path: str) -> Dataset:
    """`TLR <http://www.lara.prd.fr/benchmarks/trafficlightsrecognition>`_ dataset.

    The file structure should like::

        <path>
            root_path/
                Lara3D_URbanSeq1_JPG/
                    frame_011149.jpg
                    frame_011150.jpg
                    frame_<frame_index>.jpg
                    ...
                Lara_UrbanSeq1_GroundTruth_cvml.xml

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))

    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))
    segment = dataset.create_segment()

    file_paths = glob(os.path.join(root_path, "Lara3D_UrbanSeq1_JPG", "*.jpg"))
    labels = _parse_xml(
        os.path.join(root_path, "Lara_UrbanSeq1_GroundTruth_cvml.xml"))
    for file_path in file_paths:
        # the image file name looks like:
        # frame_000001.jpg
        frame_index = int(os.path.basename(file_path)[6:-4])
        data = Data(file_path)
        data.label.box2d = labels[frame_index]
        segment.append(data)
    return dataset
Exemplo n.º 16
0
def get_voc_detection_data(
    stem: str, image_path: str, annotation_path: str, boolean_attributes: List[str]
) -> Data:
    """Get all information of the datum corresponding to voc-like label files.

    Arguments:
        stem: The filename without extension of the data.
        image_path: The path of the image directory.
        annotation_path: The path of the annotation directory.
        boolean_attributes: The list of boolean attribute.

    Returns:
        Data: class:`~tensorbay.dataset.data.Data` instance.

    """
    data = Data(os.path.join(image_path, f"{stem}.jpg"))
    box2d = []
    with open(os.path.join(annotation_path, f"{stem}.xml"), encoding="utf-8") as fp:
        labels: Any = xmltodict.parse(fp.read())
    objects = labels["annotation"]["object"]

    if not isinstance(objects, list):
        objects = [objects]
    for obj in objects:
        attributes = {attribute: bool(int(obj[attribute])) for attribute in boolean_attributes}
        attributes["pose"] = obj["pose"]
        bndbox = obj["bndbox"]
        box2d.append(
            LabeledBox2D(
                float(bndbox["xmin"]),
                float(bndbox["ymin"]),
                float(bndbox["xmax"]),
                float(bndbox["ymax"]),
                category=obj["name"],
                attributes=attributes,
            )
        )
    data.label.box2d = box2d
    return data
Exemplo n.º 17
0
def _load_segment_10k(dataset: Dataset, root_path: str,
                      labels_dir: str) -> None:
    for segment_name in _SEGMENT_NAMES:
        segment = dataset.create_segment(segment_name)
        image_paths = glob(
            os.path.join(root_path, "images", "10k", segment_name, "*.jpg"))

        print(f"Reading data to segment '{segment_name}'...")
        if segment_name == "test":
            for image_path in image_paths:
                segment.append(Data(image_path))
        else:
            single_channel_mask_dirs: Dict[str, str] = {}
            original_mask_dirs: Dict[str, str] = {}
            for seg_type, dir_names in _SEGMENTATIONS_INFO.items():
                original_mask_dirs[seg_type] = os.path.join(
                    labels_dir, *dir_names, segment_name)
                if seg_type != "sem":
                    single_channel_mask_dir = os.path.join(
                        labels_dir,
                        "single_channel_mask",
                        segment_name,
                        dir_names[0],
                    )
                    single_channel_mask_dirs[
                        seg_type] = single_channel_mask_dir
                    os.makedirs(single_channel_mask_dir, exist_ok=True)

            label_contents = _read_label_file_10k(labels_dir, segment_name)
            for image_path in image_paths:
                segment.append(
                    _get_data_10k(
                        image_path,
                        original_mask_dirs,
                        label_contents[os.path.basename(image_path)],
                        single_channel_mask_dirs,
                    ))
            print(f"Finished reading data to segment '{segment_name}'")
Exemplo n.º 18
0
def Elpv(path: str) -> Dataset:
    """`elpv <https://github.com/zae-bayern/elpv-dataset>`_ dataset.

    The file structure should be like::

        <path>
            labels.csv
            images/
                cell0001.png
                ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))

    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))
    segment = dataset.create_segment()

    csv_path = os.path.join(root_path, "labels.csv")

    with open(csv_path, encoding="utf-8") as csv_file:
        for row in csv_file:
            image_name, attributes, category = row.strip().split()
            dirname, basename = image_name.split("/")
            image_path = os.path.join(root_path, dirname, basename)
            data = Data(image_path)
            data.label.classification = Classification(
                attributes={"defect probability": float(attributes)},
                category=category)
            segment.append(data)
    return dataset
Exemplo n.º 19
0
    def test_upload_segment_with_file(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        dataset_client = gas_client.create_dataset(dataset_name)
        dataset_client.create_draft("draft-1")

        segment = Segment("segment1")
        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            segment.append(data)

        dataset_client.upload_segment(segment)

        segment1 = Segment(name="segment1", client=dataset_client)
        assert len(segment1) == 10
        assert segment1[0].get_url()
        assert segment1[0].path == segment[0].target_remote_path

        gas_client.delete_dataset(dataset_name)
Exemplo n.º 20
0
def _get_data(
    image_path: str,
    original_mask_dir: str,
    annotation_dir: str,
    new_mask_dir: str,
    category_ids: Dict[str, int],
) -> Data:
    stem = os.path.splitext(os.path.basename(image_path))[0]
    new_mask_path = os.path.join(new_mask_dir, f"{stem}.png")
    data = Data(image_path)
    label = data.label
    with open(os.path.join(annotation_dir, f"{stem}.xml"),
              encoding="utf-8") as fp:
        labels: Any = xmltodict.parse(fp.read())["image"]
    label.box2d, label.panoptic_mask = _get_box2d_and_panoptic_mask(
        labels["object"],
        os.path.join(original_mask_dir, f"{stem}_mask.png"),
        new_mask_path,
        category_ids,
    )
    label.classification = _get_classification(
        labels["JSON_Variation_Parameters"]["parameter"])
    return data
Exemplo n.º 21
0
    def test_copy_data_from_commits(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)
        dataset = Dataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")
        dataset._catalog = Catalog.loads(CATALOG)
        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        dataset_client.commit("commit_1")

        for i in range(10, 20):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment.append(data)
        dataset_client = gas_client.upload_dataset(dataset)
        dataset_client.commit("commit_2")

        dataset_client_1 = gas_client.get_dataset(dataset_name)
        commit_id = dataset_client_1.list_commits()[-1].commit_id
        dataset_client_1.checkout(revision=commit_id)
        dataset_client.create_draft("draft_3")
        segment_client_1 = dataset_client_1.get_segment("Segment1")
        segment_client_2 = dataset_client.get_segment("Segment1")
        segment_client_2.copy_data("hello0.txt",
                                   "goodbye0.txt",
                                   source_client=segment_client_1)

        segment2 = Segment("Segment1", client=dataset_client)
        assert segment2[0].path == "goodbye0.txt"
        assert segment2[0].path != segment[0].target_remote_path
        assert segment2[0].label
        assert len(segment2) == 21

        gas_client.delete_dataset(dataset_name)
Exemplo n.º 22
0
    def test_upload_dataset_only_with_file(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_random_dataset_name()
        gas_client.create_dataset(dataset_name)

        dataset = Dataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")

        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            segment.append(Data(local_path=str(local_path)))

        dataset_client = gas_client.upload_dataset(dataset)
        assert not dataset_client.get_catalog()
        segment1 = Segment("Segment1", client=dataset_client)
        assert len(segment1) == 10
        assert segment1[0].path == "hello0.txt"
        assert not segment1[0].label

        gas_client.delete_dataset(dataset_name)
Exemplo n.º 23
0
def HardHatWorkers(path: str) -> Dataset:
    """`Hard Hat Workers <https://makeml.app/datasets/hard-hat-workers>`_ dataset.

    The file structure should be like::

        <path>
            annotations/
                hard_hat_workers0.xml
                ...
            images/
                hard_hat_workers0.png
                ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))
    annotation_dir = os.path.join(root_path, "annotations")

    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))

    segment = dataset.create_segment()
    image_paths = glob(os.path.join(root_path, "images", "*.png"))
    for image_path in image_paths:
        data = Data(image_path)
        data.label.box2d = _load_labels(
            os.path.join(
                annotation_dir,
                f"{os.path.splitext(os.path.basename(image_path))[0]}.xml"))
        segment.append(data)
    return dataset
Exemplo n.º 24
0
def _generate_data(image_path: str, labels: Dict[str, Any]) -> Data:
    data = Data(image_path)
    data.label.box2d = []

    image_id = labels["image_name_id_map"][os.path.basename(image_path)]
    image_annotations_map = labels["image_annotations_map"]

    if image_id not in image_annotations_map:
        return data

    annotations = labels["annotations"]
    poses = labels["poses"]
    categories = labels["categories"]

    for annotation_id in image_annotations_map[image_id]:
        annotation = annotations[annotation_id]
        x_top, y_top, width, height = annotation["bbox"]

        attributes = {
            "occluded": annotation["occluded"],
            "difficult": annotation["difficult"],
            "pose": poses[annotation["pose_id"] - 1]["name"],
            "truncated": annotation["truncated"],
        }

        data.label.box2d.append(
            LabeledBox2D.from_xywh(
                x=x_top,
                y=y_top,
                width=width,
                height=height,
                category=categories[annotation["category_id"]]["name"],
                attributes=attributes,
                instance=str(annotation["tracking_id"]),
            ))

    return data
Exemplo n.º 25
0
def THCHS30(path: str) -> Dataset:
    """`THCHS-30 <http://166.111.134.19:7777/data/thchs30/README.html>`_ dataset.

    The file structure should be like::

        <path>
            lm_word/
                lexicon.txt
            data/
                A11_0.wav.trn
                ...
            dev/
                A11_101.wav
                ...
            train/
            test/

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    dataset = Dataset(DATASET_NAME)
    dataset.catalog.sentence = _get_subcatalog(
        os.path.join(path, "lm_word", "lexicon.txt"))
    for segment_name in _SEGMENT_NAME_LIST:
        segment = dataset.create_segment(segment_name)
        for filename in glob(os.path.join(path, segment_name, "*.wav")):
            data = Data(filename)
            label_file = os.path.join(path, "data",
                                      os.path.basename(filename) + ".trn")
            data.label.sentence = _get_label(label_file)
            segment.append(data)
    return dataset
Exemplo n.º 26
0
def PASCALContext(mask_path: str, image_path: str) -> Dataset:
    """`PASCALContext <https://cs.stanford.edu/~roozbeh/pascal-context/>`_ dataset.

    The file structure should be like::

        <mask_path>
            <image_name>.png
            ...

        <image_path>
            <image_name>.jpg
            ...

    Arguments:
        mask_path: The root directory of the dataset mask.
        image_path: The root directory of the dataset image.

    Returns:
        Loaded :class: `~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_mask_path = os.path.abspath(os.path.expanduser(mask_path))
    root_image_path = os.path.abspath(os.path.expanduser(image_path))

    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(
        os.path.join(os.path.dirname(__file__), "catalog.json"))

    segment = dataset.create_segment("trainval")
    for mask_filename in glob(os.path.join(root_mask_path, "*.png")):
        stem = os.path.splitext(os.path.basename(mask_filename))[0]
        data = Data(os.path.join(root_image_path, f"{stem}.jpg"))
        data.label.semantic_mask = SemanticMask(mask_filename)
        segment.append(data)

    return dataset
Exemplo n.º 27
0
def THUCNews(path: str) -> Dataset:
    """`THUCNews <http://thuctc.thunlp.org/>`_ dataset.

    The folder structure should be like::

        <path>
            <category>/
                0.txt
                1.txt
                2.txt
                3.txt
                ...
            <category>/
            ...

    Arguments:
        path: The root directory of the dataset.

    Returns:
        Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.

    """
    root_path = os.path.abspath(os.path.expanduser(path))
    dataset = Dataset(DATASET_NAME)
    dataset.load_catalog(os.path.join(os.path.dirname(__file__), "catalog.json"))
    segment = dataset.create_segment()

    for category in dataset.catalog.classification.categories.keys():
        text_paths = glob(os.path.join(root_path, category, "*.txt"))
        for text_path in text_paths:
            data = Data(text_path)
            data.label.classification = Classification(category)

            segment.append(data)

    return dataset
    def test__upload_segment(self, mocker):
        segment_test = Segment(name="test1")
        for i in range(5):
            segment_test.append(Data(f"data{i}.png"))
        segment_client = SegmentClient(name="test1", data_client=self.dataset_client)
        get_or_create_segment = mocker.patch(
            f"{dataset.__name__}.DatasetClient.get_or_create_segment", return_value=segment_client
        )
        list_data_paths = mocker.patch(
            f"{segment.__name__}.SegmentClient.list_data_paths",
            return_value=["data1.png", "data2.png"],
        )
        multithread_upload = mocker.patch(f"{dataset.__name__}.multithread_upload")

        with Tqdm(5, disable=False) as pbar:
            self.dataset_client._upload_segment(segment_test, skip_uploaded_files=True, pbar=pbar)
            get_or_create_segment.assert_called_once_with(segment_test.name)
            list_data_paths.assert_called_once_with()
            args, keywords = multithread_upload.call_args
            assert args[0] == segment_client._upload_or_import_data
            assert [item.path for item in args[1]] == ["data0.png", "data3.png", "data4.png"]
            assert keywords["callback"] == segment_client._synchronize_upload_info
            assert keywords["jobs"] == 1
            assert keywords["pbar"] == pbar
            multithread_upload.assert_called_once()
        with Tqdm(5, disable=False) as pbar:
            self.dataset_client._upload_segment(segment_test, skip_uploaded_files=False, pbar=pbar)
            get_or_create_segment.assert_called_with(segment_test.name)
            list_data_paths.assert_called_with()
            args, keywords = multithread_upload.call_args
            assert args[0] == segment_client._upload_or_import_data
            assert [item.path for item in args[1]] == [f"data{i}.png" for i in range(5)]
            assert keywords["callback"] == segment_client._synchronize_upload_info
            assert keywords["jobs"] == 1
            assert keywords["pbar"] == pbar
            multithread_upload.assert_called()
def init_dataset_client(accesskey, url, tmp_path_factory):
    gas_client = GAS(access_key=accesskey, url=url)
    dataset_name = get_dataset_name()
    dataset_client = gas_client.create_dataset(dataset_name)

    dataset_client.create_draft("draft-1")
    dataset_client.commit("commit-1")

    dataset_client.create_branch("dev")
    dataset = Dataset(name=dataset_name)
    segment = dataset.create_segment("Segment1")
    dataset._catalog = Catalog.loads(CATALOG)
    path = tmp_path_factory.mktemp("sub")
    os.makedirs(path, exist_ok=True)
    for i in range(10):
        local_path = path / f"hello{i}.txt"
        local_path.write_text(f"CONTENT_{i}")
        data = Data(local_path=str(local_path))
        data.label = Label.loads(LABEL_2)
        segment.append(data)
    dataset_client = gas_client.upload_dataset(dataset, branch_name="dev")
    dataset_client.commit("commit-2")

    dataset_client.checkout(DEFAULT_BRANCH)
    dataset = Dataset(name=dataset_name)
    segment = dataset.create_segment("Segment1")
    dataset._catalog = Catalog.loads(CATALOG)
    path = tmp_path_factory.mktemp("sub")
    os.makedirs(path, exist_ok=True)
    for i in range(4):
        local_path = path / f"hello{i}.txt"
        local_path.write_text(f"CONTENT_{i}")
        data = Data(local_path=str(local_path))
        data.label = Label.loads(LABEL_1)
        segment.append(data)
    dataset_client = gas_client.upload_dataset(dataset,
                                               branch_name=DEFAULT_BRANCH)
    dataset_client.commit("commit-3")
    yield dataset_client

    gas_client.delete_dataset(dataset_name)
Exemplo n.º 30
0
    def test_move_segment_override(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)
        dataset = Dataset(name=dataset_name)
        segment1 = dataset.create_segment("Segment1")
        dataset._catalog = Catalog.loads(CATALOG)
        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT_1")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment1.append(data)

        segment2 = dataset.create_segment("Segment2")
        for i in range(10, 20):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT_2")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment2.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        dataset_client.move_segment("Segment1",
                                    "Segment2",
                                    strategy="override")

        with pytest.raises(ResourceNotExistError):
            dataset_client.get_segment("Segment1")

        segment_moved = Segment("Segment2", client=dataset_client)
        assert segment_moved[0].path == "hello0.txt"
        assert segment_moved[0].path == segment1[0].target_remote_path
        assert segment_moved[0].open().read() == b"CONTENT_1"
        assert segment_moved[0].label

        gas_client.delete_dataset(dataset_name)