def test_move_data(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() gas_client.create_dataset(dataset_name) dataset = Dataset(name=dataset_name) segment = dataset.create_segment("Segment1") dataset._catalog = Catalog.loads(CATALOG) path = tmp_path / "sub" path.mkdir() for i in range(10): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) data.label = Label.loads(LABEL) segment.append(data) dataset_client = gas_client.upload_dataset(dataset) segment_client = dataset_client.get_segment("Segment1") segment_client.move_data("hello0.txt", "goodbye0.txt") segment_client.move_data("hello9.txt", "goodbye1.txt") # with pytest.raises(InvalidParamsError): # segment_client.move_data("hello1.txt", "goodbye2.txt", strategy="push") segment2 = Segment("Segment1", client=dataset_client) assert segment2[0].path == "goodbye0.txt" assert segment2[1].path == "goodbye1.txt" assert segment2[9].path == "hello8.txt" assert segment2[0].label gas_client.delete_dataset(dataset_name)
def test_upload_dataset_with_label(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() gas_client.create_dataset(dataset_name) dataset = Dataset(name=dataset_name) segment = dataset.create_segment("Segment1") # When uploading label, upload catalog first. dataset._catalog = Catalog.loads(CATALOG_CONTENTS) path = tmp_path / "sub" path.mkdir() local_path = path / "hello.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) data.label = Label.loads(LABEL) segment.append(data) dataset_client = gas_client.upload_dataset(dataset) dataset_client.commit("upload dataset with label") dataset = Dataset(dataset_name, gas_client) assert dataset.catalog == Catalog.loads(CATALOG_CONTENTS) assert dataset[0][0].label == Label.loads(LABEL) statistics1 = dataset_client.get_label_statistics() assert statistics1 == Statistics(STATISTICS) total_size = dataset_client.get_total_size() assert total_size == TOTALSIZE gas_client.delete_dataset(dataset_name)
def test_move_fusion_segment(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() gas_client.create_dataset(dataset_name, is_fusion=True) dataset = FusionDataset(name=dataset_name) segment = dataset.create_segment("Segment1") segment.sensors.add(Sensor.loads(LIDAR_DATA)) dataset._catalog = Catalog.loads(CATALOG) path = tmp_path / "sub" path.mkdir() for i in range(10): frame = Frame() local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) data.label = Label.loads(LABEL) frame[LIDAR_DATA["name"]] = data segment.append(frame) dataset_client = gas_client.upload_dataset(dataset) segment_client = dataset_client.move_segment("Segment1", "Segment2") assert segment_client.name == "Segment2" with pytest.raises(InvalidParamsError): dataset_client.move_segment("Segment1", "Segment3", strategy="push") segment2 = FusionSegment("Segment2", client=dataset_client) assert segment2[0][LIDAR_DATA["name"]].path == "hello0.txt" assert ( segment2[0][LIDAR_DATA["name"]].path == segment[0][LIDAR_DATA["name"]].target_remote_path ) assert segment2[0][LIDAR_DATA["name"]].label gas_client.delete_dataset(dataset_name)
def test_upload_dataset_only_with_file(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() gas_client.create_dataset(dataset_name) dataset = Dataset(name=dataset_name) dataset.notes.is_continuous = True segment = dataset.create_segment("Segment1") path = tmp_path / "sub" path.mkdir() for i in range(10): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") segment.append(Data(local_path=str(local_path))) dataset_client = gas_client.upload_dataset(dataset) assert dataset_client.status.branch_name == DEFAULT_BRANCH assert dataset_client.status.draft_number assert not dataset_client.status.commit_id assert dataset_client.get_notes().is_continuous is True assert not dataset_client.get_catalog() segment1 = Segment("Segment1", client=dataset_client) assert len(segment1) == 10 assert segment1[0].path == "hello0.txt" assert not segment1[0].label gas_client.delete_dataset(dataset_name)
def test_move_data_override(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() gas_client.create_dataset(dataset_name) dataset = Dataset(name=dataset_name) segment = dataset.create_segment("Segment1") dataset._catalog = Catalog.loads(CATALOG) path = tmp_path / "sub" path.mkdir() for i in range(10): local_path = path / f"hello{i}.txt" local_path.write_text(f"CONTENT_{i}") data = Data(local_path=str(local_path)) data.label = Label.loads(LABEL) segment.append(data) dataset_client = gas_client.upload_dataset(dataset) segment_client = dataset_client.get_segment("Segment1") segment_client.move_data("hello0.txt", "hello1.txt", strategy="override") segment_moved = Segment("Segment1", client=dataset_client) for data in segment_moved: assert data.path != "hello0.txt" assert data.label if data.path == "hello1.txt": assert data.open().read() == b"CONTENT_0" gas_client.delete_dataset(dataset_name)
def test_upload_dataset_after_commit(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() gas_client.create_dataset(dataset_name) dataset = Dataset(name=dataset_name) dataset._catalog = Catalog.loads(CATALOG) dataset.notes.is_continuous = True segment = dataset.create_segment("Segment1") path = tmp_path / "sub" path.mkdir() for i in range(10): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) data.label = Label.loads(LABEL) segment.append(data) dataset_client = gas_client.upload_dataset(dataset) dataset_client.commit("test") dataset_remote = Dataset(name=dataset_name, gas=gas_client) assert dataset_remote.notes.is_continuous == dataset.notes.is_continuous assert dataset_remote.catalog == dataset.catalog segment_remote = dataset_remote[0] assert len(segment_remote) == len(segment) for remote_data, data in zip(segment_remote, segment): assert remote_data.path == data.target_remote_path assert remote_data.label == data.label gas_client.delete_dataset(dataset_name)
def test_upload_dataset_with_label(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() gas_client.create_dataset(dataset_name) dataset = Dataset(name=dataset_name) segment = dataset.create_segment("Segment1") # When uploading label, upload catalog first. dataset._catalog = Catalog.loads(CATALOG) path = tmp_path / "sub" path.mkdir() for i in range(10): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) data.label = Label.loads(LABEL) segment.append(data) dataset_client = gas_client.upload_dataset(dataset) assert dataset_client.get_catalog() segment1 = Segment("Segment1", client=dataset_client) assert len(segment1) == 10 assert segment1[0].path == "hello0.txt" assert segment1[0].label gas_client.delete_dataset(dataset_name)
def test_move_segment_skip(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() gas_client.create_dataset(dataset_name) dataset = Dataset(name=dataset_name) segment1 = dataset.create_segment("Segment1") dataset._catalog = Catalog.loads(CATALOG) path = tmp_path / "sub" path.mkdir() for i in range(10): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT_1") data = Data(local_path=str(local_path)) data.label = Label.loads(LABEL) segment1.append(data) segment2 = dataset.create_segment("Segment2") for i in range(10, 20): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT_2") data = Data(local_path=str(local_path)) data.label = Label.loads(LABEL) segment2.append(data) dataset_client = gas_client.upload_dataset(dataset) dataset_client.move_segment("Segment1", "Segment2", strategy="skip") segment_moved = Segment("Segment2", client=dataset_client) assert segment_moved[0].path == "hello10.txt" assert segment_moved[0].path == segment2[0].target_remote_path assert segment_moved[0].open().read() == b"CONTENT_2" assert segment_moved[0].label gas_client.delete_dataset(dataset_name)
def test_move_segment_abort(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() gas_client.create_dataset(dataset_name) dataset = Dataset(name=dataset_name) segment1 = dataset.create_segment("Segment1") dataset._catalog = Catalog.loads(CATALOG) path = tmp_path / "sub" path.mkdir() for i in range(10): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) data.label = Label.loads(LABEL) segment1.append(data) segment2 = dataset.create_segment("Segment2") for i in range(10): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) data.label = Label.loads(LABEL) segment2.append(data) dataset_client = gas_client.upload_dataset(dataset) with pytest.raises(InternalServerError): dataset_client.move_segment("Segment1", "Segment2") gas_client.delete_dataset(dataset_name)
def test_modify_catalog(self, accesskey, url): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() dataset_client = gas_client.create_dataset(dataset_name) dataset_client.create_draft("draft-1") dataset_client.upload_catalog(Catalog.loads(CATALOG2)) catalog_to_upload = Catalog.loads(CATALOG2) dataset_client.upload_catalog(catalog_to_upload) catalog_get = dataset_client.get_catalog() assert catalog_get == catalog_to_upload # Uploading empty catalog is not allowed catalog_to_upload = Catalog.loads({}) with pytest.raises(InvalidParamsError): dataset_client.upload_catalog(catalog_to_upload) gas_client.delete_dataset(dataset_name)
def test_get_commit(self, accesskey, url): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() dataset_client = gas_client.create_dataset(dataset_name) dataset_client.create_draft("draft-1") dataset_client.commit("commit-1", "test", tag="V1") commit_1_id = dataset_client.status.commit_id dataset_client.create_draft("draft-2") dataset_client.commit("commit-2") commit_2_id = dataset_client.status.commit_id # Get top commit commit = dataset_client.get_commit(commit_2_id) assert commit.commit_id == commit_2_id assert commit.parent_commit_id == commit_1_id assert commit.title == "commit-2" assert commit.description == "" assert commit.committer.name assert commit.committer.date # Get one commit before the top one commit = dataset_client.get_commit(commit_1_id) assert commit.commit_id == commit_1_id assert commit.parent_commit_id == ROOT_COMMIT_ID assert commit.title == "commit-1" assert commit.description == "test" assert commit.committer.name assert commit.committer.date # If not giving commit, get the current commit commit = dataset_client.get_commit() assert commit.commit_id == commit_2_id assert commit.parent_commit_id == commit_1_id assert commit.title == "commit-2" assert commit.description == "" assert commit.committer.name assert commit.committer.date # Can not create the tag without giving commit in the draft dataset_client.create_draft("draft-3") with pytest.raises(StatusError): dataset_client.get_commit() gas_client.delete_dataset(dataset_name)
def test_upload_label_without_catalog(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() dataset_client = gas_client.create_dataset(dataset_name) dataset_client.create_draft("draft-1") segment_client = dataset_client.get_or_create_segment("segment1") path = tmp_path / "sub" path.mkdir() local_path = path / "hello0.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) segment_client.upload_file(data.path, data.target_remote_path) # If not uploading catalog, uploading label is not allowed data.label = Label.loads(LABEL) with pytest.raises(ResponseError): segment_client.upload_label(data) gas_client.delete_dataset(dataset_name)
def test_catalog_version_control(self, accesskey, url): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() dataset_client = gas_client.create_dataset(dataset_name) catalog_to_upload = Catalog.loads(CATALOG1) dataset_client.create_draft("draft-1") dataset_client.upload_catalog(catalog_to_upload) # After committing the draft, catalog is stored in the new commit dataset_client.commit("commit-1") catalog_get_commit = dataset_client.get_catalog() assert catalog_get_commit == catalog_to_upload # After creating a new draft, catalog is stored in the new draft dataset_client.create_draft("draft-2") catalog_get_draft = dataset_client.get_catalog() assert catalog_get_draft == catalog_to_upload gas_client.delete_dataset(dataset_name)
def test_create_branch_on_revision(self, accesskey, url): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() dataset_client = gas_client.create_dataset(dataset_name) dataset_client.create_draft("draft-1") dataset_client.commit("commit-1", tag="V1") commit_id_1 = dataset_client.status.commit_id dataset_client.create_draft("draft-2") dataset_client.commit("commit-2", tag="V2") commit_id_2 = dataset_client.status.commit_id # Create a branch based on commit id dataset_client.create_branch("Test_Commit_Id", revision=commit_id_1) assert dataset_client._dataset_id is not None assert dataset_client._status.commit_id == commit_id_1 assert dataset_client._status.draft_number is None assert dataset_client._status.branch_name == "Test_Commit_Id" # Create a branch based on tag branch = dataset_client.get_branch("Test_Commit_Id") assert branch.name == "Test_Commit_Id" assert branch.commit_id == commit_id_1 assert branch.parent_commit_id == ROOT_COMMIT_ID assert branch.title == "commit-1" # Create a branch based on branch dataset_client.create_branch("Test_Tag", revision="V1") branch = dataset_client.get_branch("Test_Tag") assert branch.name == "Test_Tag" assert branch.commit_id == commit_id_1 assert branch.parent_commit_id == ROOT_COMMIT_ID assert branch.title == "commit-1" dataset_client.create_branch("Test_Branch", revision=DEFAULT_BRANCH) branch = dataset_client.get_branch("Test_Branch") assert branch.name == "Test_Branch" assert branch.commit_id == commit_id_2 assert branch.parent_commit_id == commit_id_1 assert branch.title == "commit-2" gas_client.delete_dataset(dataset_name)
def test_create_dataset(self, accesskey, url): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() dataset_client = gas_client.create_dataset(dataset_name) assert dataset_client.dataset_id is not None gas_client.get_dataset(dataset_name) gas_client.delete_dataset(dataset_name)
def test_list_dataset_names(self, accesskey, url): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() gas_client.create_dataset(dataset_name) datasets = gas_client.list_dataset_names() assert dataset_name in datasets gas_client.delete_dataset(dataset_name)
def test_upload_dataset_to_given_branch(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() dataset_client_1 = gas_client.create_dataset(dataset_name) dataset_client_1.create_draft("test") dataset_client_1.commit("test1") dataset_client_1.create_branch("dev") dataset = Dataset(name=dataset_name) segment = dataset.create_segment("Segment1") path = tmp_path / "sub" path.mkdir() for i in range(10): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") segment.append(Data(local_path=str(local_path))) dataset_client_2 = gas_client.upload_dataset(dataset, branch_name="dev") assert dataset_client_2.status.branch_name == "dev" assert dataset_client_2.status.draft_number assert not dataset_client_2.status.commit_id segment1 = Segment("Segment1", client=dataset_client_2) assert len(segment1) == 10 assert segment1[0].path == "hello0.txt" assert not segment1[0].label dataset_client_2.commit("test2") draft_number = dataset_client_2.create_draft("test2") for i in range(10): local_path = path / f"goodbye{i}.txt" local_path.write_text("CONTENT") segment.append(Data(local_path=str(local_path))) dataset_client_2 = gas_client.upload_dataset(dataset, branch_name="dev") assert dataset_client_2.status.branch_name == "dev" assert dataset_client_2.status.draft_number == draft_number assert not dataset_client_2.status.commit_id with pytest.raises(ResourceNotExistError): gas_client.upload_dataset(dataset, branch_name="wrong") gas_client.delete_dataset(dataset_name)
def test_notes(self, accesskey, url): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() dataset_client = gas_client.create_dataset(dataset_name) with pytest.raises(StatusError): dataset_client.update_notes(is_continuous=True) dataset_client.create_draft("draft-1") origin_notes = dataset_client.get_notes() assert origin_notes.is_continuous is False assert origin_notes.bin_point_cloud_fields is None dataset_client.update_notes( is_continuous=True, bin_point_cloud_fields=["X", "Y", "Z", "Intensity", "Ring"] ) modified_notes = dataset_client.get_notes() assert modified_notes.is_continuous is True assert modified_notes.bin_point_cloud_fields == ["X", "Y", "Z", "Intensity", "Ring"] gas_client.delete_dataset(dataset_name)
def test_sensor(self, accesskey, url): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() dataset_client = gas_client.create_dataset(dataset_name, is_fusion=True) dataset_client.create_draft("draft-1") segment_client = dataset_client.get_or_create_segment("segment1") for sensor_data in SENSORS_DATA: segment_client.upload_sensor(Sensor.loads(sensor_data)) sensors = segment_client.get_sensors() assert sensors == Sensors.loads(SENSORS_DATA) segment_client.delete_sensor(SENSORS_DATA[0]["name"]) sensors = segment_client.get_sensors() assert len(sensors) == 4 assert sensors == Sensors.loads(SENSORS_DATA[1:]) gas_client.delete_dataset(dataset_name)
def test_upload_file(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() dataset_client = gas_client.create_dataset(dataset_name) dataset_client.create_draft("draft-1") segment_client = dataset_client.get_or_create_segment("segment1") path = tmp_path / "sub" path.mkdir() for i in range(5): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") segment_client.upload_file(local_path=str(local_path)) data = segment_client.list_data() assert data[0].path == "hello0.txt" assert data[0].open().read() == b"CONTENT" assert not data[0].label gas_client.delete_dataset(dataset_name)
def test_cache_dataset(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() gas_client.create_dataset(dataset_name) dataset = Dataset(name=dataset_name) segment = dataset.create_segment("Segment1") # When uploading label, upload catalog first. dataset._catalog = Catalog.loads(_CATALOG) path = tmp_path / "sub" semantic_path = tmp_path / "semantic_mask" instance_path = tmp_path / "instance_mask" path.mkdir() semantic_path.mkdir() instance_path.mkdir() for i in range(_SEGMENT_LENGTH): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) data.label = Label.loads(_LABEL) semantic_mask = semantic_path / f"semantic_mask{i}.png" semantic_mask.write_text("SEMANTIC_MASK") data.label.semantic_mask = SemanticMask(str(semantic_mask)) instance_mask = instance_path / f"instance_mask{i}.png" instance_mask.write_text("INSTANCE_MASK") data.label.instance_mask = InstanceMask(str(instance_mask)) segment.append(data) dataset_client = gas_client.upload_dataset(dataset) dataset_client.commit("commit-1") cache_path = tmp_path / "cache_test" dataset_client.enable_cache(str(cache_path)) segment1 = Segment("Segment1", client=dataset_client) for data in segment1: data.open() data.label.semantic_mask.open() data.label.instance_mask.open() segment_cache_path = (cache_path / dataset_client.dataset_id / dataset_client.status.commit_id / "Segment1") semantic_mask_cache_path = segment_cache_path / "semantic_mask" instance_mask_cache_path = segment_cache_path / "instance_mask" for cache_dir, extension in ( (segment_cache_path, "txt"), (semantic_mask_cache_path, "png"), (instance_mask_cache_path, "png"), ): assert set(cache_dir.glob(f"*.{extension}")) == set( cache_dir / f"hello{i}.{extension}" for i in range(_SEGMENT_LENGTH)) gas_client.delete_dataset(dataset_name)
def test_upload_frame_without_sensor(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() dataset_client = gas_client.create_dataset(dataset_name, is_fusion=True) dataset_client.create_draft("draft-1") segment_client = dataset_client.get_or_create_segment("segment1") path = tmp_path / "sub" path.mkdir() frame = Frame() local_path = path / "hello0.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) frame[LIDAR_DATA["name"]] = data # If not uploading sensor, uploading frame is not allowed with pytest.raises(ResponseError): segment_client.upload_frame(frame, timestamp=0) gas_client.delete_dataset(dataset_name)
def test_copy_data_from_commits(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() gas_client.create_dataset(dataset_name) dataset = Dataset(name=dataset_name) segment = dataset.create_segment("Segment1") dataset._catalog = Catalog.loads(CATALOG) path = tmp_path / "sub" path.mkdir() for i in range(10): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) data.label = Label.loads(LABEL) segment.append(data) dataset_client = gas_client.upload_dataset(dataset) dataset_client.commit("commit_1") for i in range(10, 20): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) data.label = Label.loads(LABEL) segment.append(data) dataset_client = gas_client.upload_dataset(dataset) dataset_client.commit("commit_2") dataset_client_1 = gas_client.get_dataset(dataset_name) commit_id = dataset_client_1.list_commits()[-1].commit_id dataset_client_1.checkout(revision=commit_id) dataset_client.create_draft("draft_3") segment_client_1 = dataset_client_1.get_segment("Segment1") segment_client_2 = dataset_client.get_segment("Segment1") segment_client_2.copy_data("hello0.txt", "goodbye0.txt", source_client=segment_client_1) segment2 = Segment("Segment1", client=dataset_client) assert segment2[0].path == "goodbye0.txt" assert segment2[0].path != segment[0].target_remote_path assert segment2[0].label assert len(segment2) == 21 gas_client.delete_dataset(dataset_name)
def test_cache_fusion_dataset(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() dataset_client = gas_client.create_dataset(dataset_name, is_fusion=True) dataset_client.create_draft("draft-1") segment = FusionSegment("Segment1") segment.sensors = Sensors.loads(_SENSORS_DATA) paths = {"Lidar1": tmp_path / "lidar", "Camera1": tmp_path / "camera"} for path in paths.values(): path.mkdir() for i in range(_SEGMENT_LENGTH): frame = Frame() for sensor_data in _SENSORS_DATA: sensor_name = sensor_data["name"] data_path = paths[sensor_name] / f"{sensor_name}{i}.txt" data_path.write_text("CONTENT") frame[sensor_name] = Data(local_path=str(data_path)) segment.append(frame) dataset_client.upload_segment(segment) dataset_client.commit("commit-1") cache_path = tmp_path / "cache_test" dataset_client.enable_cache(str(cache_path)) segment1 = FusionSegment(name="Segment1", client=dataset_client) for frame in segment1: for data in frame.values(): data.open() segment_cache_path = (cache_path / dataset_client.dataset_id / dataset_client.status.commit_id / "Segment1") correct_files = set( segment_cache_path / f'{sensor_data["name"]}{i}.txt' for i in range(_SEGMENT_LENGTH) for sensor_data in _SENSORS_DATA) assert set(segment_cache_path.glob("*.txt")) == correct_files gas_client.delete_dataset(dataset_name)
def test_checkout(self, accesskey, url): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() dataset_client = gas_client.create_dataset(dataset_name) dataset_client.create_draft("draft-1") dataset_client.commit("commit-1", tag="V1") commit_1_id = dataset_client.status.commit_id dataset_client.create_draft("draft-2") dataset_client.commit("commit-2") commit_2_id = dataset_client.status.commit_id # Neither revision nor draft number is given with pytest.raises(TypeError): dataset_client.checkout() # Both revision and draft number are given with pytest.raises(TypeError): dataset_client.checkout(revision=commit_1_id, draft_number=3) dataset_client.checkout(revision=commit_1_id) assert dataset_client._status.branch_name is None assert dataset_client._status.commit_id == commit_1_id # The revision does not exist. with pytest.raises(ResourceNotExistError): dataset_client.checkout(revision="123") assert dataset_client._status.commit_id == commit_1_id dataset_client.checkout(revision="V1") assert dataset_client._status.branch_name is None assert dataset_client._status.commit_id == commit_1_id dataset_client.checkout(revision=DEFAULT_BRANCH) assert dataset_client._status.branch_name == DEFAULT_BRANCH assert dataset_client._status.commit_id == commit_2_id dataset_client.create_draft("draft-3") # The draft does not exist. with pytest.raises(ResourceNotExistError): dataset_client.checkout(draft_number=2) dataset_client.checkout(draft_number=3) assert dataset_client._status.branch_name == DEFAULT_BRANCH assert dataset_client._status.draft_number == 3 gas_client.delete_dataset(dataset_name)
def test_delete_data(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() dataset_client = gas_client.create_dataset(dataset_name) dataset_client.create_draft("draft-1") dataset_client.upload_catalog(Catalog.loads(BOX2D_CATALOG)) segment_client = dataset_client.get_or_create_segment("segment1") path = tmp_path / "sub" path.mkdir() for i in range(10): local_path = path / f"hello{i}.txt" local_path.write_text(f"CONTENT{i}") data = Data(local_path=str(local_path)) data.label = Label.loads(LABEL) segment_client.upload_data(data) segment_client.delete_data("hello0.txt") data_paths = segment_client.list_data_paths() assert "hello0.txt" not in data_paths gas_client.delete_dataset(dataset_name)
def test_get_commit_by_revision(self, accesskey, url): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() dataset_client = gas_client.create_dataset(dataset_name) dataset_client.create_draft("draft-1") dataset_client.commit("commit-1", "commit-1-description", tag="V1") commit_1_id = dataset_client.status.commit_id dataset_client.create_draft("draft-2") dataset_client.commit("commit-2") commit_2_id = dataset_client.status.commit_id # Get top commit by tag commit = dataset_client.get_commit("V1") assert commit.commit_id == commit_1_id assert commit.parent_commit_id == ROOT_COMMIT_ID assert commit.title == "commit-1" assert commit.description == "commit-1-description" assert commit.committer.name assert commit.committer.date # Get top commit by branch commit = dataset_client.get_commit(DEFAULT_BRANCH) assert commit.commit_id == commit_2_id assert commit.parent_commit_id == commit_1_id assert commit.title == "commit-2" assert commit.description == "" assert commit.committer.name assert commit.committer.date # The tag does not exists with pytest.raises(ResourceNotExistError): dataset_client.get_commit("V2") # Thr branch does not exists with pytest.raises(ResourceNotExistError): dataset_client.get_commit("main1") gas_client.delete_dataset(dataset_name)
def test_upload_segment_with_file(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() dataset_client = gas_client.create_dataset(dataset_name) dataset_client.create_draft("draft-1") segment = Segment("segment1") path = tmp_path / "sub" path.mkdir() for i in range(10): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) segment.append(data) dataset_client.upload_segment(segment) segment1 = Segment(name="segment1", client=dataset_client) assert len(segment1) == 10 assert segment1[0].get_url() assert segment1[0].path == segment[0].target_remote_path gas_client.delete_dataset(dataset_name)
def test_delete_branch(self, accesskey, url): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() dataset_client = gas_client.create_dataset(dataset_name) dataset_client.create_draft("draft-1") dataset_client.commit("commit-1") dataset_client.create_branch("T123") dataset_client.create_draft("draft-2") dataset_client.commit("commit-2") commit_2_id = dataset_client.status.commit_id # Deleting the current branch is not allowed with pytest.raises(StatusError): dataset_client.delete_branch("T123") dataset_client.checkout(revision=DEFAULT_BRANCH) dataset_client.delete_branch("T123") with pytest.raises(ResourceNotExistError): dataset_client.get_branch("T123") # After deleting the branch, the commit in the branch will not be deleted dataset_client.get_commit(commit_2_id)
def test_create_branch(self, accesskey, url): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() dataset_client = gas_client.create_dataset(dataset_name) # Creating the branch in a empty dataset is not allowed with pytest.raises(ForbiddenError): dataset_client.create_branch("T123") dataset_client.create_draft("draft-1") dataset_client.commit("commit-1", "test", tag="V1") commit_1_id = dataset_client.status.commit_id dataset_client.create_branch("T123") branch = dataset_client.get_branch("T123") assert branch.commit_id == commit_1_id assert branch.parent_commit_id == ROOT_COMMIT_ID assert branch.title == "commit-1" assert branch.description == "test" assert branch.committer.name assert branch.committer.date gas_client.delete_dataset(dataset_name)