Пример #1
0
    def test_copy_segment_skip(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)
        dataset = Dataset(name=dataset_name)
        segment1 = dataset.create_segment("Segment1")
        dataset._catalog = Catalog.loads(CATALOG)
        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment1.append(data)

        segment2 = dataset.create_segment("Segment2")
        for i in range(10, 20):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment2.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        dataset_client.copy_segment("Segment1", "Segment2", strategy="skip")

        segment_copied = Segment("Segment2", client=dataset_client)
        assert segment_copied[0].path == "hello10.txt"
        assert segment_copied[0].path == segment2[0].target_remote_path
        assert segment_copied[0].label

        gas_client.delete_dataset(dataset_name)
Пример #2
0
    def test_overwrite_label(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        dataset_client = gas_client.create_dataset(dataset_name)
        dataset_client.create_draft("draft-1")
        dataset_client.upload_catalog(Catalog.loads(CATALOG))
        segment_client = dataset_client.get_or_create_segment("segment1")
        path = tmp_path / "sub"
        path.mkdir()
        local_path = path / "hello0.txt"
        local_path.write_text("CONTENT")
        data = Data(local_path=str(local_path))
        segment_client.upload_file(data.path, data.target_remote_path)

        data.label = Label.loads(LABEL)
        segment_client.upload_label(data)

        # Replace labels
        data.label = Label.loads(NEW_LABEL)
        segment_client.upload_label(data)

        data = segment_client.list_data()
        assert data[0].path == "hello0.txt"
        assert data[0].label
        # todo: match the input and output label

        gas_client.delete_dataset(dataset_name)
    def test_job_result(self, dataset_client, job):
        job.update()
        dataset_client.checkout(draft_number=job.result.number)
        assert dataset_client.status.branch_name == DEFAULT_BRANCH
        segment_data = dataset_client.get_segment("Segment1").list_data()

        if job.arguments.get("strategy") == "abort":
            assert len(segment_data) == 4
            for i in range(4):
                data = segment_data[i]
                assert data.path == f"hello{i}.txt"
                assert data.label == Label.loads(LABEL_1)

        if job.arguments.get("strategy") == "override":
            assert len(segment_data) == 10
            for i in range(10):
                data = segment_data[i]
                assert data.path == f"hello{i}.txt"
                assert data.label == Label.loads(LABEL_2)

        if job.arguments.get("strategy") == "skip":
            assert len(segment_data) == 10
            for i in range(10):
                data = segment_data[i]
                assert data.path == f"hello{i}.txt"
                if i < 4:
                    assert data.label == Label.loads(LABEL_1)
                else:
                    assert data.label == Label.loads(LABEL_2)
Пример #4
0
    def test_upload_dataset_with_label(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)

        dataset = Dataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")
        # When uploading label, upload catalog first.
        dataset._catalog = Catalog.loads(CATALOG_CONTENTS)

        path = tmp_path / "sub"
        path.mkdir()
        local_path = path / "hello.txt"
        local_path.write_text("CONTENT")
        data = Data(local_path=str(local_path))
        data.label = Label.loads(LABEL)
        segment.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        dataset_client.commit("upload dataset with label")
        dataset = Dataset(dataset_name, gas_client)
        assert dataset.catalog == Catalog.loads(CATALOG_CONTENTS)
        assert dataset[0][0].label == Label.loads(LABEL)

        statistics1 = dataset_client.get_label_statistics()
        assert statistics1 == Statistics(STATISTICS)

        total_size = dataset_client.get_total_size()
        assert total_size == TOTALSIZE
        gas_client.delete_dataset(dataset_name)
Пример #5
0
    def test_copy_segment_abort(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)
        dataset = Dataset(name=dataset_name)
        segment1 = dataset.create_segment("Segment1")
        dataset._catalog = Catalog.loads(CATALOG)
        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment1.append(data)

        segment2 = dataset.create_segment("Segment2")
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment2.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        with pytest.raises(InternalServerError):
            dataset_client.copy_segment("Segment1", "Segment2")

        gas_client.delete_dataset(dataset_name)
Пример #6
0
    def test_upload_label(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        dataset_client = gas_client.create_dataset(dataset_name)
        dataset_client.create_draft("draft-1")
        dataset_client.upload_catalog(Catalog.loads(CATALOG))
        segment_client = dataset_client.get_or_create_segment("segment1")
        path = tmp_path / "sub"
        path.mkdir()

        local_path = path / "hello0.txt"
        local_path.write_text("CONTENT")
        data = Data(local_path=str(local_path))
        data.label = Label.loads(LABEL)
        # If not uploading file, uploading label is not allowed
        with pytest.raises(ResourceNotExistError):
            segment_client.upload_label(data)

        # Uploading files
        segment_client.upload_file(data.path, data.target_remote_path)

        data.label = Label.loads(LABEL)
        segment_client.upload_label(data)

        data = segment_client.list_data()
        assert data[0].path == "hello0.txt"
        assert data[0].label
        # todo: match the input and output label

        gas_client.delete_dataset(dataset_name)
Пример #7
0
    def test_get_data(self, accesskey, url, tmp_path, mask_file):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        dataset_client = gas_client.create_dataset(dataset_name)

        dataset_client.create_draft("draft-1")
        dataset_client.upload_catalog(Catalog.loads(CATALOG))
        segment_client = dataset_client.get_or_create_segment("segment1")
        path = tmp_path / "sub"
        path.mkdir()

        # Upload data with label
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text(f"CONTENT{i}")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)

            semantic_mask = SemanticMask(str(mask_file))
            semantic_mask.all_attributes = {0: {"occluded": True}, 1: {"occluded": False}}
            data.label.semantic_mask = semantic_mask

            instance_mask = InstanceMask(str(mask_file))
            instance_mask.all_attributes = {0: {"occluded": True}, 1: {"occluded": False}}
            data.label.instance_mask = instance_mask

            panoptic_mask = PanopticMask(str(mask_file))
            panoptic_mask.all_category_ids = {100: 0, 101: 1}
            data.label.panoptic_mask = panoptic_mask
            segment_client.upload_data(data)

        for i in range(10):
            data = segment_client.get_data(f"hello{i}.txt")
            assert data.path == f"hello{i}.txt"
            assert data.label.box2d == Label.loads(LABEL).box2d

            stem = os.path.splitext(data.path)[0]
            remote_semantic_mask = data.label.semantic_mask
            semantic_mask = RemoteSemanticMask.from_response_body(SEMANTIC_MASK_LABEL)
            assert remote_semantic_mask.path == f"{stem}.png"
            assert remote_semantic_mask.all_attributes == semantic_mask.all_attributes

            remote_instance_mask = data.label.instance_mask
            instance_mask = RemoteInstanceMask.from_response_body(INSTANCE_MASK_LABEL)
            assert remote_instance_mask.path == f"{stem}.png"
            assert remote_instance_mask.all_attributes == instance_mask.all_attributes

            remote_panoptic_mask = data.label.panoptic_mask
            panoptic_mask = RemotePanopticMask.from_response_body(PANOPTIC_MASK_LABEL)
            assert remote_panoptic_mask.path == f"{stem}.png"
            assert remote_panoptic_mask.all_category_ids == panoptic_mask.all_category_ids

        gas_client.delete_dataset(dataset_name)
    def test_eq(self):
        label1 = Label()
        label1.classification = Classification("cat", {"color": "white"})

        label2 = Label()
        label2.classification = Classification("cat", {"color": "white"})

        label3 = Label()
        label3.classification = Classification("cat", {"color": "black"})

        assert label1 == label2
        assert label1 != label3
Пример #9
0
    def test_copy_fusion_segment(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name, is_fusion=True)
        dataset = FusionDataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")
        segment.sensors.add(Sensor.loads(LIDAR_DATA))
        dataset._catalog = Catalog.loads(CATALOG)
        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            frame = Frame()
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            frame[LIDAR_DATA["name"]] = data
            segment.append(frame)

        dataset_client = gas_client.upload_dataset(dataset)
        segment_client = dataset_client.copy_segment("Segment1", "Segment2")
        assert segment_client.name == "Segment2"

        with pytest.raises(InvalidParamsError):
            dataset_client.copy_segment("Segment1", "Segment3", strategy="push")

        segment2 = FusionSegment("Segment2", client=dataset_client)
        assert segment2[0][LIDAR_DATA["name"]].path == "hello0.txt"
        assert (
            segment2[0][LIDAR_DATA["name"]].path
            == segment[0][LIDAR_DATA["name"]].target_remote_path
        )
        assert segment2[0][LIDAR_DATA["name"]].label

        gas_client.delete_dataset(dataset_name)
Пример #10
0
def _get_instance_label(instances_annotations: Dict[int, Any], image_id: int,
                        categories: Dict[int, str]) -> Label:
    label: Label = Label()
    label.box2d = []
    label.multi_polygon = []
    label.rle = []
    if image_id not in instances_annotations:
        return label

    for annotation in instances_annotations[image_id]:
        category = categories[annotation["category_id"]]
        label.box2d.append(
            LabeledBox2D.from_xywh(*annotation["bbox"], category=category))
        if annotation["iscrowd"] == 0:
            points = [
                chunked(coordinates, 2)
                for coordinates in annotation["segmentation"]
            ]
            label.multi_polygon.append(
                LabeledMultiPolygon(points, category=category))
        else:
            label.rle.append(
                LabeledRLE(annotation["segmentation"]["counts"],
                           category=category))
    return label
    def test_create_and_upload_dataset_with_config(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        try:
            gas_client.get_auth_storage_config(name=_LOCAL_CONFIG_NAME)
        except ResourceNotExistError:
            pytest.skip(f"skip this case because there's no {_LOCAL_CONFIG_NAME} config")

        gas_client.create_dataset(dataset_name, config_name=_LOCAL_CONFIG_NAME)
        dataset = Dataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")
        # When uploading label, upload catalog first.
        dataset._catalog = Catalog.loads(CATALOG)

        path = tmp_path / "sub"
        path.mkdir()
        for i in range(5):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        assert dataset_client.get_catalog()
        segment1 = Segment("Segment1", client=dataset_client)
        assert len(segment1) == 5
        for i in range(5):
            assert segment1[i].path == f"hello{i}.txt"
            assert segment1[i].label

        gas_client.delete_dataset(dataset_name)
Пример #12
0
    def test_move_data_skip(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)
        dataset = Dataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")
        dataset._catalog = Catalog.loads(CATALOG)
        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text(f"CONTENT_{i}")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        segment_client = dataset_client.get_segment("Segment1")

        segment_client.move_data("hello0.txt", "hello1.txt", strategy="skip")

        segment_moved = Segment("Segment1", client=dataset_client)
        assert segment_moved[0].path == "hello1.txt"
        assert segment_moved[0].open().read() == b"CONTENT_1"

        gas_client.delete_dataset(dataset_name)
    def test_dumps(self):
        contents = {
            "CLASSIFICATION": {"category": "cat", "attributes": {"gender": "male"}},
            "BOX2D": [
                {
                    "box2d": {"xmin": 1, "ymin": 1, "xmax": 2, "ymax": 2},
                    "category": "dog",
                    "attributes": {"gender": "female"},
                }
            ],
        }

        label = Label()
        label.classification = Classification.loads(contents["CLASSIFICATION"])
        label.box2d = [LabeledBox2D.loads(contents["BOX2D"][0])]
        assert label.dumps() == contents
Пример #14
0
    def test_delete_frame(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        dataset_client = gas_client.create_dataset(dataset_name, is_fusion=True)
        dataset_client.create_draft("draft-1")
        dataset_client.upload_catalog(Catalog.loads(BOX2D_CATALOG))
        segment_client = dataset_client.get_or_create_segment("segment1")
        segment_client.upload_sensor(Sensor.loads(LIDAR_DATA))

        path = tmp_path / "sub"
        path.mkdir()
        for i in range(5):
            frame = Frame()
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            frame[LIDAR_DATA["name"]] = data
            segment_client.upload_frame(frame, timestamp=i)

        frame_1_id = segment_client.list_frames()[0].frame_id
        segment_client.delete_frame(frame_1_id)
        frame_ids = [frame.frame_id for frame in segment_client.list_frames()]
        assert frame_1_id not in frame_ids

        gas_client.delete_dataset(dataset_name)
Пример #15
0
    def test_move_segment(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)
        dataset = Dataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")
        dataset._catalog = Catalog.loads(CATALOG)
        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        segment_client = dataset_client.move_segment("Segment1", "Segment2")
        assert segment_client.name == "Segment2"

        with pytest.raises(InvalidParamsError):
            dataset_client.move_segment("Segment1", "Segment3", strategy="push")

        segment2 = Segment("Segment2", client=dataset_client)
        assert segment2[0].path == "hello0.txt"
        assert segment2[0].path == segment[0].target_remote_path
        assert segment2[0].label

        gas_client.delete_dataset(dataset_name)
Пример #16
0
    def test_copy_data_between_datasets(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name_1 = get_dataset_name()
        gas_client.create_dataset(dataset_name_1)
        dataset_1 = Dataset(name=dataset_name_1)
        segment_1 = dataset_1.create_segment("Segment1")
        dataset_1._catalog = Catalog.loads(CATALOG)
        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment_1.append(data)
        dataset_client_1 = gas_client.upload_dataset(dataset_1)
        dataset_client_1.commit("upload data")
        segment_client_1 = dataset_client_1.get_segment("Segment1")

        dataset_name_2 = dataset_name_1 + "_2"
        dataset_client_2 = gas_client.create_dataset(dataset_name_2)
        dataset_client_2.create_draft("draft_2")
        dataset_client_2.create_segment("Segment1")
        segment_client_2 = dataset_client_2.get_segment("Segment1")

        segment_client_2.copy_data("hello0.txt", "hello0.txt", source_client=segment_client_1)

        segment2 = Segment("Segment1", client=dataset_client_2)
        assert segment2[0].path == "hello0.txt"
        assert segment2[0].label

        gas_client.delete_dataset(dataset_name_1)
        gas_client.delete_dataset(dataset_name_2)
Пример #17
0
    def test_copy_data(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)
        dataset = Dataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")
        dataset._catalog = Catalog.loads(CATALOG)
        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        segment_client = dataset_client.get_segment("Segment1")
        segment_client.copy_data("hello0.txt", "goodbye0.txt")
        segment_client.copy_data("hello1.txt", "hello10.txt")

        with pytest.raises(InvalidParamsError):
            segment_client.copy_data("hello2.txt", "see_you.txt", strategy="push")

        segment2 = Segment("Segment1", client=dataset_client)
        assert segment2[0].path == "goodbye0.txt"
        assert segment2[3].path == "hello10.txt"
        assert segment2[1].label

        gas_client.delete_dataset(dataset_name)
Пример #18
0
    def test_upload_frame_with_label(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        dataset_client = gas_client.create_dataset(dataset_name, is_fusion=True)
        dataset_client.create_draft("draft-1")
        dataset_client.upload_catalog(Catalog.loads(CATALOG))
        segment_client = dataset_client.get_or_create_segment("segment1")

        segment_client.upload_sensor(Sensor.loads(LIDAR_DATA))

        path = tmp_path / "sub"
        path.mkdir()
        for i in range(5):
            frame = Frame()
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            frame[LIDAR_DATA["name"]] = data
            segment_client.upload_frame(frame, timestamp=i)

        frames = segment_client.list_frames()
        assert len(frames) == 5
        assert frames[0][LIDAR_DATA["name"]].path == "hello0.txt"
        assert frames[0][LIDAR_DATA["name"]].label
        # todo: match the input and output label

        gas_client.delete_dataset(dataset_name)
Пример #19
0
    def test_upload_dataset_after_commit(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)

        dataset = Dataset(name=dataset_name)
        dataset._catalog = Catalog.loads(CATALOG)
        dataset.notes.is_continuous = True
        segment = dataset.create_segment("Segment1")

        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        dataset_client.commit("test")
        dataset_remote = Dataset(name=dataset_name, gas=gas_client)
        assert dataset_remote.notes.is_continuous == dataset.notes.is_continuous
        assert dataset_remote.catalog == dataset.catalog

        segment_remote = dataset_remote[0]
        assert len(segment_remote) == len(segment)
        for remote_data, data in zip(segment_remote, segment):
            assert remote_data.path == data.target_remote_path
            assert remote_data.label == data.label

        gas_client.delete_dataset(dataset_name)
Пример #20
0
    def test_upload_fusion_segment_with_label(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        dataset_client = gas_client.create_dataset(dataset_name, is_fusion=True)
        dataset_client.create_draft("draft-1")
        dataset_client.upload_catalog(Catalog.loads(CATALOG))

        segment = FusionSegment("segment1")
        segment.sensors.add(Sensor.loads(LIDAR_DATA))

        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            frame = Frame()
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            frame[LIDAR_DATA["name"]] = data
            segment.append(frame)

        dataset_client.upload_segment(segment)
        segment1 = FusionSegment(name="segment1", client=dataset_client)
        assert len(segment1) == 10
        assert segment1[0][LIDAR_DATA["name"]].path == "hello0.txt"
        assert (
            segment1[0][LIDAR_DATA["name"]].path
            == segment[0][LIDAR_DATA["name"]].target_remote_path
        )
        assert segment1[0][LIDAR_DATA["name"]].label
        # todo: match the input and output label

        gas_client.delete_dataset(dataset_name)
Пример #21
0
    def test_delete_data(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        dataset_client = gas_client.create_dataset(dataset_name)
        dataset_client.create_draft("draft-1")
        dataset_client.upload_catalog(Catalog.loads(CATALOG))
        segment_client = dataset_client.get_or_create_segment("segment1")

        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment_client.upload_data(data)

        segment_client.delete_data("hello0.txt")
        data_paths = segment_client.list_data_paths()
        assert "hello0.txt" not in data_paths

        segment_client.delete_data(segment_client.list_data_paths())
        data = segment_client.list_data()
        assert len(data) == 0

        gas_client.delete_dataset(dataset_name)
Пример #22
0
    def test_upload_dataset_with_label(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)

        dataset = Dataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")
        # When uploading label, upload catalog first.
        dataset._catalog = Catalog.loads(CATALOG)

        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        assert dataset_client.get_catalog()
        segment1 = Segment("Segment1", client=dataset_client)
        assert len(segment1) == 10
        assert segment1[0].path == "hello0.txt"
        assert segment1[0].label

        gas_client.delete_dataset(dataset_name)
Пример #23
0
    def test_upload_segment_with_label(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        dataset_client = gas_client.create_dataset(dataset_name)
        dataset_client.create_draft("draft-1")
        dataset_client.upload_catalog(Catalog.loads(CATALOG))

        segment = Segment("segment1")
        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment.append(data)

        dataset_client.upload_segment(segment)
        segment1 = Segment(name="segment1", client=dataset_client)
        assert len(segment1) == 10
        assert segment1[0].path == "hello0.txt"
        assert segment1[0].path == segment[0].target_remote_path
        assert segment1[0].label
        # todo: match the input and output label

        gas_client.delete_dataset(dataset_name)
Пример #24
0
def init_dataset_client(accesskey, url, tmp_path_factory):
    gas_client = GAS(access_key=accesskey, url=url)
    dataset_name = get_dataset_name()
    gas_client.create_dataset(dataset_name, is_fusion=True)

    dataset = FusionDataset(name=dataset_name)
    dataset._catalog = Catalog.loads(CATALOG)
    path = tmp_path_factory.mktemp("sub")
    os.makedirs(path, exist_ok=True)
    for segment_name in SEGMENTS_NAME:
        segment = dataset.create_segment(segment_name)
        frame = Frame()
        for camera_name, label in LABEL.items():
            camera = Camera(camera_name)
            translation = Vector3D(1, 2, 3)
            camera.set_extrinsics(translation=translation)
            camera.set_extrinsics(translation=translation)
            camera.set_camera_matrix(fx=1.1, fy=1.1, cx=1.1, cy=1.1)
            camera.set_distortion_coefficients(p1=1.2, p2=1.2, k1=1.2, k2=1.2)
            segment.sensors.add(camera)
            local_path = path / f"{segment_name}_{camera_name}.txt"
            local_path.write_text(f"CONTENT_{segment_name}_{camera_name}")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(label)
            frame[camera_name] = data
        segment.append(frame)
    dataset_client = gas_client.upload_dataset(dataset)
    dataset_client.commit("commit-1")

    yield dataset_client

    gas_client.delete_dataset(dataset_name)
Пример #25
0
    def test_copy_data_from_commits(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)
        dataset = Dataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")
        dataset._catalog = Catalog.loads(CATALOG)
        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        dataset_client.commit("commit_1")

        for i in range(10, 20):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment.append(data)
        dataset_client = gas_client.upload_dataset(dataset)
        dataset_client.commit("commit_2")

        dataset_client_1 = gas_client.get_dataset(dataset_name)
        commit_id = dataset_client_1.list_commits()[-1].commit_id
        dataset_client_1.checkout(revision=commit_id)
        dataset_client.create_draft("draft_3")
        segment_client_1 = dataset_client_1.get_segment("Segment1")
        segment_client_2 = dataset_client.get_segment("Segment1")
        segment_client_2.copy_data("hello0.txt",
                                   "goodbye0.txt",
                                   source_client=segment_client_1)

        segment2 = Segment("Segment1", client=dataset_client)
        assert segment2[0].path == "goodbye0.txt"
        assert segment2[0].path != segment[0].target_remote_path
        assert segment2[0].label
        assert len(segment2) == 21

        gas_client.delete_dataset(dataset_name)
    def test_cache_dataset(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)

        dataset = Dataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")
        # When uploading label, upload catalog first.
        dataset._catalog = Catalog.loads(_CATALOG)

        path = tmp_path / "sub"
        semantic_path = tmp_path / "semantic_mask"
        instance_path = tmp_path / "instance_mask"
        path.mkdir()
        semantic_path.mkdir()
        instance_path.mkdir()
        for i in range(_SEGMENT_LENGTH):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(_LABEL)

            semantic_mask = semantic_path / f"semantic_mask{i}.png"
            semantic_mask.write_text("SEMANTIC_MASK")
            data.label.semantic_mask = SemanticMask(str(semantic_mask))

            instance_mask = instance_path / f"instance_mask{i}.png"
            instance_mask.write_text("INSTANCE_MASK")
            data.label.instance_mask = InstanceMask(str(instance_mask))
            segment.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        dataset_client.commit("commit-1")
        cache_path = tmp_path / "cache_test"
        dataset_client.enable_cache(str(cache_path))
        segment1 = Segment("Segment1", client=dataset_client)
        for data in segment1:
            data.open()
            data.label.semantic_mask.open()
            data.label.instance_mask.open()

        segment_cache_path = (cache_path / dataset_client.dataset_id /
                              dataset_client.status.commit_id / "Segment1")
        semantic_mask_cache_path = segment_cache_path / "semantic_mask"
        instance_mask_cache_path = segment_cache_path / "instance_mask"

        for cache_dir, extension in (
            (segment_cache_path, "txt"),
            (semantic_mask_cache_path, "png"),
            (instance_mask_cache_path, "png"),
        ):
            assert set(cache_dir.glob(f"*.{extension}")) == set(
                cache_dir / f"hello{i}.{extension}"
                for i in range(_SEGMENT_LENGTH))

        gas_client.delete_dataset(dataset_name)
def init_dataset_client(accesskey, url, tmp_path_factory):
    gas_client = GAS(access_key=accesskey, url=url)
    dataset_name = get_dataset_name()
    dataset_client = gas_client.create_dataset(dataset_name)

    dataset_client.create_draft("draft-1")
    dataset_client.commit("commit-1")

    dataset_client.create_branch("dev")
    dataset = Dataset(name=dataset_name)
    segment = dataset.create_segment("Segment1")
    dataset._catalog = Catalog.loads(CATALOG)
    path = tmp_path_factory.mktemp("sub")
    os.makedirs(path, exist_ok=True)
    for i in range(10):
        local_path = path / f"hello{i}.txt"
        local_path.write_text(f"CONTENT_{i}")
        data = Data(local_path=str(local_path))
        data.label = Label.loads(LABEL_2)
        segment.append(data)
    dataset_client = gas_client.upload_dataset(dataset, branch_name="dev")
    dataset_client.commit("commit-2")

    dataset_client.checkout(DEFAULT_BRANCH)
    dataset = Dataset(name=dataset_name)
    segment = dataset.create_segment("Segment1")
    dataset._catalog = Catalog.loads(CATALOG)
    path = tmp_path_factory.mktemp("sub")
    os.makedirs(path, exist_ok=True)
    for i in range(4):
        local_path = path / f"hello{i}.txt"
        local_path.write_text(f"CONTENT_{i}")
        data = Data(local_path=str(local_path))
        data.label = Label.loads(LABEL_1)
        segment.append(data)
    dataset_client = gas_client.upload_dataset(dataset,
                                               branch_name=DEFAULT_BRANCH)
    dataset_client.commit("commit-3")
    yield dataset_client

    gas_client.delete_dataset(dataset_name)
Пример #28
0
    def test_move_segment_override(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)
        dataset = Dataset(name=dataset_name)
        segment1 = dataset.create_segment("Segment1")
        dataset._catalog = Catalog.loads(CATALOG)
        path = tmp_path / "sub"
        path.mkdir()
        for i in range(10):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT_1")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment1.append(data)

        segment2 = dataset.create_segment("Segment2")
        for i in range(10, 20):
            local_path = path / f"hello{i}.txt"
            local_path.write_text("CONTENT_2")
            data = Data(local_path=str(local_path))
            data.label = Label.loads(LABEL)
            segment2.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        dataset_client.move_segment("Segment1",
                                    "Segment2",
                                    strategy="override")

        with pytest.raises(ResourceNotExistError):
            dataset_client.get_segment("Segment1")

        segment_moved = Segment("Segment2", client=dataset_client)
        assert segment_moved[0].path == "hello0.txt"
        assert segment_moved[0].path == segment1[0].target_remote_path
        assert segment_moved[0].open().read() == b"CONTENT_1"
        assert segment_moved[0].label

        gas_client.delete_dataset(dataset_name)
Пример #29
0
 def test_squash_and_merge_skip(self, dataset_client):
     draft_number = dataset_client.squash_and_merge(
         "draft-6",
         description="description",
         source_branch_name="dev",
         target_branch_name=DEFAULT_BRANCH,
         strategy="skip",
     )
     time.sleep(5)
     dataset_client.checkout(draft_number=draft_number)
     assert dataset_client.status.branch_name == DEFAULT_BRANCH
     segment_data = dataset_client.get_segment("Segment1").list_data()
     assert len(segment_data) == 10
     for i in range(10):
         data = segment_data[i]
         assert data.path == f"hello{i}.txt"
         if i < 4:
             assert data.label == Label.loads(LABEL_1)
         else:
             assert data.label == Label.loads(LABEL_2)
     dataset_client.checkout(DEFAULT_BRANCH)
     dataset_client.close_draft(draft_number)
Пример #30
0
    def test_upload_label(self, accesskey, url, tmp_path):
        gas_client = GAS(access_key=accesskey, url=url)
        dataset_name = get_dataset_name()
        gas_client.create_dataset(dataset_name)

        dataset = Dataset(name=dataset_name)
        segment = dataset.create_segment("Segment1")
        # When uploading label, upload catalog first.
        dataset._catalog = Catalog.loads(CATALOG_CONTENTS)

        path = tmp_path / "sub"
        path.mkdir()
        local_path = path / "hello.txt"
        local_path.write_text("CONTENT")
        data = Data(local_path=str(local_path))
        data.label = Label.loads(LABEL)
        segment.append(data)

        dataset_client = gas_client.upload_dataset(dataset)
        dataset_client.commit("upload dataset with label")
        dataset = Dataset(dataset_name, gas_client)
        assert dataset[0][0].label == Label.loads(LABEL)

        dataset_client.create_draft("update label")
        segment_client = dataset_client.get_segment(segment.name)

        upload_data = []
        new_label = Label.loads(LABEL)
        new_label.multi_polygon[0].category = "dog"
        for data in segment:
            data.label = new_label
            upload_data.append(data)
        segment_client.upload_label(upload_data)
        dataset_client.commit("update label")
        dataset = Dataset(dataset_name, gas_client)
        assert dataset.catalog == Catalog.loads(CATALOG_CONTENTS)
        assert dataset[0][0].label == new_label

        gas_client.delete_dataset(dataset_name)