def _upload_segment( self, segment: Segment, *, jobs: int = 1, skip_uploaded_files: bool = False, pbar: Tqdm, ) -> SegmentClient: segment_client = self.get_or_create_segment(segment.name) all_data: Iterator[Union[AuthData, Data]] = filter( lambda data: pbar.update_for_skip(not isinstance(data, RemoteData) ), segment, # type: ignore[arg-type] ) if not skip_uploaded_files: segment_filter = all_data else: done_set = set(segment_client.list_data_paths()) segment_filter = filter( lambda data: pbar.update_for_skip(data.target_remote_path not in done_set), all_data, ) multithread_upload( # pylint: disable=protected-access segment_client._upload_or_import_data, segment_filter, callback=segment_client._synchronize_upload_info, jobs=jobs, pbar=pbar, ) return segment_client
def _extract_all_data( source_frames: Iterator[Tuple[Frame, ULID]], pbar: Tqdm, ) -> FrameDataGenerator: for frame, frame_id in source_frames: for sensor_name, data in frame.items(): if isinstance(data, RemoteData): pbar.update() continue yield data, sensor_name, frame_id.str
def test__extract_all_data(self): source_frames = [] ulids = [] for i in range(5): temp_frame = Frame() if i % 2 == 0: temp_frame["camera"] = Data(f"{i}.png") else: temp_frame["lidar"] = Data(f"{i}.png") temp_ulid = from_timestamp(10 * i + 10) temp_frame.frame_id = temp_ulid source_frames.append((temp_frame, temp_ulid)) ulids.append(temp_ulid) with Tqdm(5, disable=False) as pbar: for index, values in enumerate( self.fusion_dataset_client._extract_all_data( source_frames, pbar)): data, sensor_name, frame_id = values assert data.path == f"{index}.png" if index % 2 == 0: assert sensor_name == "camera" else: assert sensor_name == "lidar" assert frame_id == ulids[index].str
def test__extract_unuploaded_data(self): source_frames = [] ulids = [] done_frames = {} for i in range(5): temp_frame = Frame() temp_ulid = from_timestamp(10 * i + 10) if i % 2 == 0: temp_frame["camera"] = Data(f"{i}.png") done_frames[temp_ulid.timestamp().timestamp] = temp_frame else: temp_frame["lidar"] = Data(f"{i}.png") ulids.append(temp_ulid) temp_frame.frame_id = temp_ulid source_frames.append((temp_frame, temp_ulid)) with Tqdm(5, disable=False) as pbar: for index, values in enumerate( self.fusion_dataset_client._extract_unuploaded_data( source_frames, pbar, done_frames=done_frames ) ): data, sensor_name, frame_id = values assert data.path == f"{index * 2 + 1}.png" assert sensor_name == "lidar" assert frame_id == ulids[index].str
def test__upload_segment(self, mocker): segment_test = Segment(name="test1") for i in range(5): segment_test.append(Data(f"data{i}.png")) segment_client = SegmentClient(name="test1", data_client=self.dataset_client) get_or_create_segment = mocker.patch( f"{dataset.__name__}.DatasetClient.get_or_create_segment", return_value=segment_client) list_data_paths = mocker.patch( f"{segment.__name__}.SegmentClient.list_data_paths", return_value=["data1.png", "data2.png"], ) multithread_upload = mocker.patch( f"{dataset.__name__}.multithread_upload") with Tqdm(5, disable=False) as pbar: self.dataset_client._upload_segment(segment_test, skip_uploaded_files=True, pbar=pbar) get_or_create_segment.assert_called_once_with(segment_test.name) list_data_paths.assert_called_once_with() args, keywords = multithread_upload.call_args assert args[0] == segment_client._upload_or_import_data assert [item.path for item in args[1] ] == ["data0.png", "data3.png", "data4.png"] assert keywords[ "callback"] == segment_client._synchronize_upload_info assert keywords["jobs"] == 1 assert keywords["pbar"] == pbar multithread_upload.assert_called_once() with Tqdm(5, disable=False) as pbar: self.dataset_client._upload_segment(segment_test, skip_uploaded_files=False, pbar=pbar) get_or_create_segment.assert_called_with(segment_test.name) list_data_paths.assert_called_with() args, keywords = multithread_upload.call_args assert args[0] == segment_client._upload_or_import_data assert [item.path for item in args[1]] == [f"data{i}.png" for i in range(5)] assert keywords[ "callback"] == segment_client._synchronize_upload_info assert keywords["jobs"] == 1 assert keywords["pbar"] == pbar multithread_upload.assert_called()
def _extract_unuploaded_data( source_frames: Iterator[Tuple[Frame, ULID]], pbar: Tqdm, *, done_frames: Dict[float, Frame]) -> FrameDataGenerator: for frame, frame_id in source_frames: done_frame = done_frames.get(frame_id.timestamp().timestamp) if done_frame: frame_id = done_frame.frame_id for sensor_name, data in frame.items(): if isinstance(data, RemoteData): pbar.update() continue if (done_frame and sensor_name in done_frame and done_frame[sensor_name].path == data.target_remote_path): pbar.update() continue yield data, sensor_name, frame_id.str
def upload_segment( self, segment: Segment, *, jobs: int = 1, skip_uploaded_files: bool = False, quiet: bool = False, _is_cli: bool = False, ) -> SegmentClient: """Upload a :class:`~tensorbay.dataset.segment.Segment` to the dataset. This function will upload all info contains in the input :class:`~tensorbay.dataset.segment.Segment`, which includes: - Create a segment using the name of input Segment. - Upload all Data in the Segment to the dataset. Arguments: segment: The :class:`~tensorbay.dataset.segment.Segment` contains the information needs to be upload. jobs: The number of the max workers in multi-thread uploading method. skip_uploaded_files: True for skipping the uploaded files. quiet: Set to True to stop showing the upload process bar. _is_cli: Whether the method is called by CLI. Raises: Exception: When the upload got interrupted by Exception. Returns: The :class:`~tensorbay.client.segment.SegmentClient` used for uploading the data in the segment. """ self._status.check_authority_for_draft() try: with Tqdm(len(segment), disable=quiet) as pbar: return self._upload_segment( segment, jobs=jobs, skip_uploaded_files=skip_uploaded_files, pbar=pbar, ) except Exception: if _is_cli: logger.error( UPLOAD_SEGMENT_RESUME_TEMPLATE_CLI, self._status.draft_number, ) else: logger.error( UPLOAD_SEGMENT_RESUME_TEMPLATE_SDK, self._status.draft_number, self._status.draft_number, ) raise
def upload_segment( self, segment: FusionSegment, *, jobs: int = 1, skip_uploaded_files: bool = False, quiet: bool = False, ) -> FusionSegmentClient: """Upload a fusion segment object to the draft. This function will upload all info contains in the input :class:`~tensorbay.dataset.segment.FusionSegment`, which includes: - Create a segment using the name of input fusion segment object. - Upload all sensors in the segment to the dataset. - Upload all frames in the segment to the dataset. Arguments: segment: The :class:`~tensorbay.dataset.segment.FusionSegment`. jobs: The number of the max workers in multi-thread upload. skip_uploaded_files: Set it to True to skip the uploaded files. quiet: Set to True to stop showing the upload process bar. Raises: Exception: When the upload got interrupted by Exception. Returns: The :class:`~tensorbay.client.segment.FusionSegmentClient` used for uploading the data in the segment. """ self._status.check_authority_for_draft() try: with Tqdm(sum(len(frame) for frame in segment), disable=quiet) as pbar: return self._upload_segment( segment, jobs=jobs, skip_uploaded_files=skip_uploaded_files, pbar=pbar) except Exception: logger.error( UPLOAD_SEGMENT_RESUME_TEMPLATE_SDK, self._status.draft_number, self._status.draft_number, ) raise
def upload_dataset( self, dataset: Union[Dataset, FusionDataset], draft_number: Optional[int] = None, *, branch_name: Optional[str] = None, jobs: int = 1, skip_uploaded_files: bool = False, quiet: bool = False, ) -> DatasetClientType: """Upload a local dataset to TensorBay. This function will upload all information contains in the :class:`~tensorbay.dataset.dataset.Dataset` or :class:`~tensorbay.dataset.dataset.FusionDataset`, which includes: - Create a TensorBay dataset with the name and type of input local dataset. - Upload all :class:`~tensorbay.dataset.segment.Segment` or :class:`~tensorbay.dataset.segment.FusionSegment` in the dataset to TensorBay. Arguments: dataset: The :class:`~tensorbay.dataset.dataset.Dataset` or :class:`~tensorbay.dataset.dataset. FusionDataset` needs to be uploaded. draft_number: The draft number. branch_name: The branch name. jobs: The number of the max workers in multi-thread upload. skip_uploaded_files: Set it to True to skip the uploaded files. quiet: Set to True to stop showing the upload process bar. Returns: The :class:`~tensorbay.client.dataset.DatasetClient` or :class:`~tensorbay.client.dataset.FusionDatasetClient` bound with the uploaded dataset. Raises: ValueError: When uploading the dataset based on both draft number and branch name is not allowed. Exception: When Exception was raised during uploading dataset. """ dataset_client = self.get_dataset(dataset.name, isinstance(dataset, FusionDataset)) if draft_number and branch_name: raise ValueError( "Uploading the dataset based on both draft number and branch name is not allowed" ) if draft_number: dataset_client.checkout(draft_number=draft_number) else: target_branch_name = branch_name if branch_name else dataset_client.status.branch_name drafts = dataset_client.list_drafts(branch_name=target_branch_name) if drafts: dataset_client.checkout(draft_number=drafts[0].number) else: dataset_client.create_draft( 'Draft autogenerated by "GAS.upload_dataset"', branch_name=target_branch_name) try: if dataset.catalog: dataset_client.upload_catalog(dataset.catalog) dataset_client.update_notes( **dataset.notes) # type: ignore[arg-type] if isinstance(dataset, Dataset): data_count = sum(len(segment) for segment in dataset) else: data_count = sum( sum(len(frame) for frame in fusion_segment) for fusion_segment in dataset) with Tqdm(data_count, disable=quiet) as pbar: for segment in dataset: dataset_client._upload_segment( # pylint: disable=protected-access segment, # type: ignore[arg-type] jobs=jobs, skip_uploaded_files=skip_uploaded_files, pbar=pbar, ) except Exception: if draft_number: logger.error( UPLOAD_DATASET_RESUME_TEMPLATE, dataset_client.status.draft_number, dataset_client.status.draft_number, ) raise return dataset_client
def test__upload_segment(self, mocker): segment_test = FusionSegment(name="test1") ulids = [] done_frames = [] for i in range(5): temp_frame = Frame() temp_ulid = from_timestamp(10 * i + 10) temp_frame.frame_id = temp_ulid if i % 2 == 0: temp_frame["camera"] = Data(f"{i}.png") done_frames.append(temp_frame) else: temp_frame["lidar"] = Data(f"{i}.png") ulids.append(temp_ulid) segment_test.append(temp_frame) segment_client = FusionSegmentClient( name="test1", data_client=self.fusion_dataset_client) get_or_create_segment = mocker.patch( f"{dataset.__name__}.FusionDatasetClient.get_or_create_segment", return_value=segment_client, ) list_frames = mocker.patch( f"{segment.__name__}.FusionSegmentClient.list_frames", return_value=done_frames, ) multithread_upload = mocker.patch( f"{dataset.__name__}.multithread_upload") with Tqdm(5, disable=False) as pbar: self.fusion_dataset_client._upload_segment( segment_test, jobs=8, skip_uploaded_files=True, pbar=pbar) get_or_create_segment.assert_called_once_with(segment_test.name) list_frames.assert_called_once_with() args, keywords = multithread_upload.call_args for index, values in enumerate(args[1]): data, sensor_name, frame_id = values assert data.path == f"{index * 2 + 1}.png" assert sensor_name == "lidar" assert frame_id == ulids[index * 2 + 1].str assert keywords[ "callback"] == segment_client._synchronize_upload_info assert keywords["jobs"] == 8 assert keywords["pbar"] == pbar multithread_upload.assert_called_once() with Tqdm(5, disable=False) as pbar: self.fusion_dataset_client._upload_segment( segment_test, jobs=8, skip_uploaded_files=False, pbar=pbar) get_or_create_segment.assert_called_with(segment_test.name) list_frames.assert_called_with() args, keywords = multithread_upload.call_args for index, values in enumerate(args[1]): data, sensor_name, frame_id = values assert data.path == f"{index}.png" if index % 2 == 0: assert sensor_name == "camera" else: assert sensor_name == "lidar" assert frame_id == ulids[index].str assert keywords[ "callback"] == segment_client._synchronize_upload_info assert keywords["jobs"] == 8 assert keywords["pbar"] == pbar