def test_upload_frame_with_order(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() dataset_client = gas_client.create_dataset(dataset_name, is_fusion=True) dataset_client.create_draft("draft-1") segment_client = dataset_client.get_or_create_segment("segment1") segment_client.upload_sensor(Sensor.loads(LIDAR_DATA)) path = tmp_path / "sub" path.mkdir() # If noe setting frame id in frame, set timestamp(order) when uploading for i in reversed(range(5)): frame = Frame() local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) frame[LIDAR_DATA["name"]] = data segment_client.upload_frame(frame, timestamp=i) # Set frame id in frame for i in range(5, 10): frame = Frame(frame_id=ulid.from_timestamp(i)) local_path = path / f"goodbye{i}.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) frame[LIDAR_DATA["name"]] = data segment_client.upload_frame(frame) # Both setting frame id in frame and set timestamp(order) when uploading are not allowed i = 10 frame = Frame(frame_id=ulid.from_timestamp(i)) local_path = path / f"goodbye{i}.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) frame[LIDAR_DATA["name"]] = data with pytest.raises(FrameError): segment_client.upload_frame(frame, timestamp=i) # Neither setting frame id in frame nor set timestamp(order) when uploading is not allowed frame = Frame() local_path = path / f"goodbye{i}.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) frame[LIDAR_DATA["name"]] = data with pytest.raises(FrameError): segment_client.upload_frame(frame) frames = segment_client.list_frames() assert len(frames) == 10 assert frames[0][LIDAR_DATA["name"]].path == "hello0.txt" assert frames[5][LIDAR_DATA["name"]].path == "goodbye5.txt" assert not frames[0][LIDAR_DATA["name"]].label # todo: match the input and output label gas_client.delete_dataset(dataset_name)
def test_upload_segment(self, mocker): self.fusion_dataset_client._status.checkout(draft_number=1) segment_test = FusionSegment(name="test1") for i in range(5): temp_frame = Frame() temp_ulid = from_timestamp(10 * i + 10) temp_frame.frame_id = temp_ulid if i % 2 == 0: temp_frame["camera"] = Data(f"{i}.png") else: temp_frame["lidar"] = Data(f"{i}.png") segment_test.append(temp_frame) segment_client = FusionSegmentClient( name="test1", data_client=self.fusion_dataset_client) upload_segment = mocker.patch( f"{dataset.__name__}.FusionDatasetClient._upload_segment", return_value=segment_client) assert self.fusion_dataset_client.upload_segment( segment_test).name == "test1" args, keywords = upload_segment.call_args assert args[0] == segment_test assert keywords["jobs"] == 1 assert not keywords["skip_uploaded_files"] upload_segment.assert_called_once()
def test__extract_all_data(self): source_frames = [] ulids = [] for i in range(5): temp_frame = Frame() if i % 2 == 0: temp_frame["camera"] = Data(f"{i}.png") else: temp_frame["lidar"] = Data(f"{i}.png") temp_ulid = from_timestamp(10 * i + 10) temp_frame.frame_id = temp_ulid source_frames.append((temp_frame, temp_ulid)) ulids.append(temp_ulid) with Tqdm(5, disable=False) as pbar: for index, values in enumerate( self.fusion_dataset_client._extract_all_data( source_frames, pbar)): data, sensor_name, frame_id = values assert data.path == f"{index}.png" if index % 2 == 0: assert sensor_name == "camera" else: assert sensor_name == "lidar" assert frame_id == ulids[index].str
def test__extract_unuploaded_data(self): source_frames = [] ulids = [] done_frames = {} for i in range(5): temp_frame = Frame() temp_ulid = from_timestamp(10 * i + 10) if i % 2 == 0: temp_frame["camera"] = Data(f"{i}.png") done_frames[temp_ulid.timestamp().timestamp] = temp_frame else: temp_frame["lidar"] = Data(f"{i}.png") ulids.append(temp_ulid) temp_frame.frame_id = temp_ulid source_frames.append((temp_frame, temp_ulid)) with Tqdm(5, disable=False) as pbar: for index, values in enumerate( self.fusion_dataset_client._extract_unuploaded_data( source_frames, pbar, done_frames=done_frames ) ): data, sensor_name, frame_id = values assert data.path == f"{index * 2 + 1}.png" assert sensor_name == "lidar" assert frame_id == ulids[index].str
def create_id_from_datetime(timestamp: datetime) -> str: """Create an ID from an existing datetime. Args: timestamp (datetime): The time to timestamp the ID. Returns: (str): A unique lexicographic ID. """ return ulid.from_timestamp(timestamp).str.lower()
def create_id_from_datetime(dt): """Create an ID from an existing datetime. Args: dt (datetime): The time to timestamp the ID. Returns: (str): A unique lexicographic ID. """ return ulid.from_timestamp(dt)
def _upload_segment( self, segment: FusionSegment, *, jobs: int, skip_uploaded_files: bool, pbar: Tqdm, ) -> FusionSegmentClient: segment_client = self.get_or_create_segment(segment.name) for sensor in segment.sensors: segment_client.upload_sensor(sensor) if not segment: return segment_client have_frame_id = hasattr(segment[0], "frame_id") for frame in segment: if hasattr(frame, "frame_id") != have_frame_id: raise FrameError( "All the frames should have the same patterns(all have frame id or not)." ) if have_frame_id: source_frames = ((frame, frame.frame_id) for frame in segment) else: source_frames = ((frame, from_timestamp(10 * index + 10)) for index, frame in enumerate(segment)) if not skip_uploaded_files: data_to_upload = FusionDatasetClient._extract_all_data( source_frames, pbar) else: done_frames: Dict[float, Frame] = { frame.frame_id.timestamp().timestamp: frame for frame in segment_client.list_frames() } data_to_upload = FusionDatasetClient._extract_unuploaded_data( source_frames, pbar, done_frames=done_frames) multithread_upload( # pylint: disable=protected-access lambda args: segment_client._upload_or_import_data(*args), data_to_upload, callback=segment_client._synchronize_upload_info, jobs=jobs, pbar=pbar, ) return segment_client
def upload_frame(self, frame: Frame, timestamp: Optional[float] = None) -> None: """Upload frame to the draft. Arguments: frame: The :class:`~tensorbay.dataset.frame.Frame` to upload. timestamp: The mark to sort frames, supporting timestamp and float. Raises: FrameError: When lacking frame id or frame id conflicts. """ self._status.check_authority_for_draft() if timestamp is None: try: frame_id = frame.frame_id except AttributeError as error: raise FrameError( "Lack frame id, please add frame id in frame or " "give timestamp to the function!") from error elif not hasattr(frame, "frame_id"): frame_id = from_timestamp(timestamp) else: raise FrameError( "Frame id conflicts, please do not give timestamp to the function!." ) callback_bodies = [] for sensor_name, data in frame.items(): try: callback_body = data.get_callback_body( ) # type:ignore[union-attr] except AttributeError: continue callback_body["frameId"] = frame_id.str callback_body["sensorName"] = sensor_name if isinstance(data, Data): self._upload_file(data) self._upload_mask_files(data.label) callback_bodies.append(callback_body) elif isinstance(data, AuthData): self._synchronize_import_info((callback_body, )) for chunked_callback_bodies in chunked(callback_bodies, 50): self._synchronize_upload_info(chunked_callback_bodies)
def __init__(self, identifier, name, created_by, created_on): if created_on is None: created_on = arrow.utcnow() self.created_on = get_date(created_on) if identifier is None: self.identifier = \ ulid.from_timestamp(self.created_on.datetime).str else: self.identifier = identifier self.id = self.identifier self.name = name self.created_by = created_by self.events = [] self.type = 'application'
def __init__(self, identifier, name, created_by, created_on, parent_app_id): if created_on is None: created_on = arrow.utcnow() self.created_on = get_date(created_on) if identifier is None: self.identifier = \ ulid.from_timestamp(self.created_on.datetime).str else: self.identifier = identifier self.id = self.identifier self.name = name self.created_by = created_by self.parent_app_id = parent_app_id self.parent_app = None self.type = 'event'
def getRecentJobs(event, context): ''' Example body: { "site": "wmd, "timeRange": "<number of milliseconds>", } ''' params = json.loads(event.get("body", "")) table = dynamodb.Table(os.environ['DYNAMODB_JOBS']) site = params['site'] aDay = 24 * 3600 * 1000 # ms in a day (default value) timeRange = params.get('timeRange', aDay) / 1000 # convert to seconds now = time.time() #s timestamp earliest = now - timeRange earliestUlid = ulid.from_timestamp(earliest) table_response = table.query(KeyConditionExpression=Key('site').eq(site) & Key('ulid').gte(earliestUlid.str)) return get_response( HTTPStatus.OK, json.dumps(table_response['Items'], indent=4, cls=DecimalEncoder))
def upload_frame( # pylint: disable=too-many-locals self, frame: Frame, timestamp: Optional[float] = None, skip_uploaded_files: bool = False) -> None: """Upload frame to the draft. Arguments: frame: The :class:`~tensorbay.dataset.frame.Frame` to upload. timestamp: The mark to sort frames, supporting timestamp and float. skip_uploaded_files: Set it to True to skip the uploaded files. Raises: FrameError: When lacking frame id or frame id conflicts. InvalidParamsError: When remote_path does not follow linux style. """ self._status.check_authority_for_draft() if timestamp is None: try: frame_id = frame.frame_id except AttributeError as error: raise FrameError( "Lack frame id, please add frame id in frame or " "give timestamp to the function!") from error elif not hasattr(frame, "frame_id"): frame_id = from_timestamp(timestamp) else: raise FrameError( "Frame id conflicts, please do not give timestamp to the function!." ) for sensor_name, data in frame.items(): if not isinstance(data, Data): continue target_remote_path = data.target_remote_path if "\\" in target_remote_path: raise InvalidParamsError(param_name="path", param_value=target_remote_path) permission = self._get_upload_permission() post_data = permission["result"] checksum = self._calculate_file_sha1(data.path) post_data["key"] = checksum backend_type = permission["extra"]["backendType"] if backend_type == "azure": url = ( f'{permission["extra"]["host"]}{permission["extra"]["objectPrefix"]}' f'{target_remote_path}?{permission["result"]["token"]}') self._put_binary_file_to_azure(url, data.path, post_data) else: self._post_multipart_formdata( permission["extra"]["host"], data.path, target_remote_path, post_data, ) frame_info: Dict[str, Any] = { "segmentName": self._name, "sensorName": sensor_name, "frameId": str(frame_id), } if hasattr(data, "timestamp"): frame_info["timestamp"] = data.timestamp self._synchronize_upload_info(target_remote_path, checksum, frame_info, skip_uploaded_files) self._upload_label(data)
def getUlid(ts): if not ts: ts = int(time.time()) uid = ulid.from_timestamp(ts) return uid
def test__upload_segment(self, mocker): segment_test = FusionSegment(name="test1") ulids = [] done_frames = [] for i in range(5): temp_frame = Frame() temp_ulid = from_timestamp(10 * i + 10) temp_frame.frame_id = temp_ulid if i % 2 == 0: temp_frame["camera"] = Data(f"{i}.png") done_frames.append(temp_frame) else: temp_frame["lidar"] = Data(f"{i}.png") ulids.append(temp_ulid) segment_test.append(temp_frame) segment_client = FusionSegmentClient( name="test1", data_client=self.fusion_dataset_client) get_or_create_segment = mocker.patch( f"{dataset.__name__}.FusionDatasetClient.get_or_create_segment", return_value=segment_client, ) list_frames = mocker.patch( f"{segment.__name__}.FusionSegmentClient.list_frames", return_value=done_frames, ) multithread_upload = mocker.patch( f"{dataset.__name__}.multithread_upload") with Tqdm(5, disable=False) as pbar: self.fusion_dataset_client._upload_segment( segment_test, jobs=8, skip_uploaded_files=True, pbar=pbar) get_or_create_segment.assert_called_once_with(segment_test.name) list_frames.assert_called_once_with() args, keywords = multithread_upload.call_args for index, values in enumerate(args[1]): data, sensor_name, frame_id = values assert data.path == f"{index * 2 + 1}.png" assert sensor_name == "lidar" assert frame_id == ulids[index * 2 + 1].str assert keywords[ "callback"] == segment_client._synchronize_upload_info assert keywords["jobs"] == 8 assert keywords["pbar"] == pbar multithread_upload.assert_called_once() with Tqdm(5, disable=False) as pbar: self.fusion_dataset_client._upload_segment( segment_test, jobs=8, skip_uploaded_files=False, pbar=pbar) get_or_create_segment.assert_called_with(segment_test.name) list_frames.assert_called_with() args, keywords = multithread_upload.call_args for index, values in enumerate(args[1]): data, sensor_name, frame_id = values assert data.path == f"{index}.png" if index % 2 == 0: assert sensor_name == "camera" else: assert sensor_name == "lidar" assert frame_id == ulids[index].str assert keywords[ "callback"] == segment_client._synchronize_upload_info assert keywords["jobs"] == 8 assert keywords["pbar"] == pbar
def create_dataset_version(body: JsonObject) -> JsonObject: logger = set_up_logging(__name__) logger.debug(json.dumps({"event": body})) body_schema = { "type": "object", "properties": { DATASET_ID_SHORT_KEY: { "type": "string" }, METADATA_URL_KEY: { "type": "string" }, NOW_KEY: { "type": "string", "format": "date-time" }, }, "required": [DATASET_ID_SHORT_KEY, METADATA_URL_KEY], } # validate input try: validate(body, body_schema) except ValidationError as err: logger.warning(json.dumps({ERROR_KEY: err}, default=str)) return error_response(HTTPStatus.BAD_REQUEST, err.message) datasets_model_class = datasets_model_with_meta() # validate dataset exists try: dataset = datasets_model_class.get( hash_key=f"{DATASET_ID_PREFIX}{body[DATASET_ID_SHORT_KEY]}", consistent_read=True) except DoesNotExist as err: logger.warning(json.dumps({ERROR_KEY: err}, default=str)) return error_response( HTTPStatus.NOT_FOUND, f"dataset '{body[DATASET_ID_SHORT_KEY]}' could not be found") now = datetime.fromisoformat( body.get(NOW_KEY, datetime.utcnow().isoformat())) dataset_version_id = human_readable_ulid(from_timestamp(now)) # execute step function step_functions_input = { DATASET_ID_KEY: dataset.dataset_id, DATASET_PREFIX_KEY: dataset.dataset_prefix, VERSION_ID_KEY: dataset_version_id, METADATA_URL_KEY: body[METADATA_URL_KEY], } state_machine_arn = get_param( ParameterName.PROCESSING_DATASET_VERSION_CREATION_STEP_FUNCTION_ARN) step_functions_response = STEP_FUNCTIONS_CLIENT.start_execution( stateMachineArn=state_machine_arn, name=dataset_version_id, input=json.dumps(step_functions_input), ) logger.debug(json.dumps({"response": step_functions_response}, default=str)) # return arn of executing process return success_response( HTTPStatus.CREATED, { VERSION_ID_KEY: dataset_version_id, EXECUTION_ARN_KEY: step_functions_response["executionArn"], }, )
def upload_frame(self, frame: Frame, timestamp: Optional[float] = None) -> None: """Upload frame to the draft. Arguments: frame: The :class:`~tensorbay.dataset.frame.Frame` to upload. timestamp: The mark to sort frames, supporting timestamp and float. Raises: GASPathError: When remote_path does not follow linux style. GASException: When uploading frame failed. TypeError: When frame id conflicts。 ` """ self._status.check_authority_for_draft() if timestamp is None: try: frame_id = frame.frame_id except AttributeError as error: raise TypeError( "Lack frame id, please add frame id in frame or " "give timestamp to the function!") from error elif hasattr(frame, "frame_id"): raise TypeError( "Frame id conflicts, please do not give timestamp to the function!." ) else: frame_id = str(ulid.from_timestamp(timestamp)) for sensor_name, data in frame.items(): if not isinstance(data, Data): continue remote_path = data.target_remote_path if "\\" in remote_path: raise GASPathError(remote_path) permission = self._get_upload_permission() post_data = permission["result"] post_data[ "key"] = permission["extra"]["objectPrefix"] + remote_path try: version_id, etag = self._post_multipart_formdata( permission["extra"]["host"], data.path, remote_path, post_data, ) frame_info: Dict[str, Any] = { "segmentName": self._name, "sensorName": sensor_name, "frameId": frame_id, } if hasattr(data, "timestamp"): frame_info["timestamp"] = data.timestamp self._synchronize_upload_info(post_data["key"], version_id, etag, frame_info) except GASException: self._clear_upload_permission() raise self._upload_label(data)