def split_video_annotation(annotation): if not annotation.is_video: raise AttributeError("this is not a video annotation") frame_annotations = [] for i, frame_url in enumerate(annotation.frame_urls): annotations = [a.frames[i] for a in annotation.annotations if i in a.frames] annotation_classes = set([annotation.annotation_class for annotation in annotations]) filename = f"{Path(annotation.filename).stem}/{i:07d}.jpg" frame_annotations.append( dt.AnnotationFile( annotation.path, filename, annotation_classes, annotations, False, annotation.image_width, annotation.image_height, frame_url, annotation.workview_url, annotation.seq, ) ) return frame_annotations
def parse_json(path: Path, data: Dict[str, Any]) -> Iterator[dt.AnnotationFile]: annotations = data["annotations"] image_lookup_table = {image["id"]: image for image in data["images"]} category_lookup_table = { category["id"]: category for category in data["categories"] } image_annotations: Dict[str, Any] = {} for annotation in annotations: image_id = annotation["image_id"] annotation["category_id"] annotation["segmentation"] if image_id not in image_annotations: image_annotations[image_id] = [] image_annotations[image_id].append( parse_annotation(annotation, category_lookup_table)) for image_id in image_annotations.keys(): image = image_lookup_table[image_id] annotations = list(filter(None, image_annotations[image_id])) annotation_classes = set( [annotation.annotation_class for annotation in annotations]) remote_path, filename = deconstruct_full_path(image["file_name"]) yield dt.AnnotationFile(path, filename, annotation_classes, annotations, remote_path=remote_path)
def split_video_annotation( annotation: dt.AnnotationFile) -> List[dt.AnnotationFile]: if not annotation.is_video: raise AttributeError("this is not a video annotation") if not annotation.frame_urls: raise AttributeError("This Annotation has no frame urls") frame_annotations = [] for i, frame_url in enumerate(annotation.frame_urls): annotations = [ a.frames[i] for a in annotation.annotations if isinstance(a, dt.VideoAnnotation) and i in a.frames ] annotation_classes: Set[dt.AnnotationClass] = set( [annotation.annotation_class for annotation in annotations]) filename: str = f"{Path(annotation.filename).stem}/{i:07d}.png" frame_annotations.append( dt.AnnotationFile( annotation.path, filename, annotation_classes, annotations, False, annotation.image_width, annotation.image_height, frame_url, annotation.workview_url, annotation.seq, )) return frame_annotations
def parse_path(path: Path) -> Optional[dt.AnnotationFile]: """ Parses the given pascalvoc file and maybe returns the corresponding annotation. The file must have the following structure: ```xml <filename>SOME_FILE_NAME</filename> <object> <name>CLASS_NAME</name> <bndbox> <xmax>NUMBER</xmax> <xmin>NUMBER</xmin> <ymax>NUMBER</ymax> <ymin>NUMBER</ymin> </bndbox> </object> <object> ... </object> ``` Parameters -------- path: Path The path of the file to parse. Returns ------- Optional[darwin.datatypes.AnnotationFile] An AnnotationFile with the parsed information from the file or None, if the file is not a `XML` file. Raises ------ ValueError If a mandatory child element is missing or is empty. Mandatory child elements are: filename, name, bndbox, xmin, xmax, ymin and ymax. """ if path.suffix != ".xml": return None tree = ET.parse(str(path)) root = tree.getroot() filename = _find_text_value(root, "filename") annotations: List[dt.Annotation] = list( filter(None, map(_parse_annotation, root.findall("object")))) annotation_classes = set( [annotation.annotation_class for annotation in annotations]) return dt.AnnotationFile(path, filename, annotation_classes, annotations, remote_path="/")
def parse_file(path: Path) -> Optional[dt.AnnotationFile]: if path.suffix != ".json": return with path.open() as f: data = json.load(f) annotations = list( filter(None, map(_parse_annotation, data["annotations"]))) annotation_classes = set( [annotation.annotation_class for annotation in annotations]) return dt.AnnotationFile(path, _remove_leading_slash(data["filename"]), annotation_classes, annotations)
def parse_json(path, data): annotations = data["annotations"] image_lookup_table = {image["id"]: image for image in data["images"]} category_lookup_table = { category["id"]: category for category in data["categories"] } image_annotations = {} image_tags = {} for annotation in annotations: image_id = annotation["image_id"] annotation["category_id"] annotation["segmentation"] if image_id not in image_annotations: image_annotations[image_id] = [] image_annotations[image_id].append( parse_annotation(annotation, category_lookup_table)) for tag in data["tag_categories"]: image_id = tag["image_id"] if image_id not in image_tags: image_tags[image_id] = [] image_tags[image_id].append(dt.make_tag(tag["name"])) for image_id in image_annotations.keys(): image = image_lookup_table[image_id] annotations = list(filter(None, image_annotations[image_id])) annotation_classes = set( [annotation.annotation_class for annotation in annotations]) yield dt.AnnotationFile(path, image["file_name"], annotation_classes, annotations) for image_id in image_tags.keys(): print(image_id, image_tags[image_id]) image = image_lookup_table[image_id] annotation_classes = set([ annotation.annotation_class for annotation in image_tags[image_id] ]) yield dt.AnnotationFile(path, image["file_name"], annotation_classes, image_tags[image_id])
def parse_file(path: Path) -> Optional[dt.AnnotationFile]: if path.suffix != ".xml": return tree = ET.parse(path) root = tree.getroot() filename = root.find("filename").text annotations = list( filter(None, map(_parse_annotation, root.findall("object")))) annotation_classes = set( [annotation.annotation_class for annotation in annotations]) return dt.AnnotationFile(path, filename, annotation_classes, annotations)
def parse_darwin_image(path, data, count): annotations = list(filter(None, map(parse_darwin_annotation, data["annotations"]))) annotation_classes = set([annotation.annotation_class for annotation in annotations]) return dt.AnnotationFile( path, get_local_filename(data["image"]), annotation_classes, annotations, False, data["image"]["width"], data["image"]["height"], data["image"]["url"], data["image"].get("workview_url"), data["image"].get("seq", count), )
def parse_path(path: Path) -> Optional[List[dt.AnnotationFile]]: if path.suffix != ".csv": return None files = [] file_annotation_map = {} with path.open() as f: reader = csv.reader(f) for row in reader: try: filename, tag, start_frame, end_frame = map( lambda s: s.strip(), row) except ValueError: continue if filename == "": continue start_frame = int(start_frame) end_frame = int(end_frame) annotation = dt.make_tag(tag) frames = {i: annotation for i in range(start_frame, end_frame + 1)} keyframes = { i: i == start_frame for i in range(start_frame, end_frame + 1) } annotation = dt.make_video_annotation(frames, keyframes, [[start_frame, end_frame]], False) if filename not in file_annotation_map: file_annotation_map[filename] = [] file_annotation_map[filename].append(annotation) for filename in file_annotation_map: annotations = file_annotation_map[filename] annotation_classes = set( [annotation.annotation_class for annotation in annotations]) files.append( dt.AnnotationFile(path, filename, annotation_classes, annotations, is_video=True, remote_path="/")) return files
def parse_file(path: Path) -> Optional[List[dt.AnnotationFile]]: if path.suffix != ".csv": return files = [] with path.open() as f: reader = csv.reader(f) for row in reader: filename, *tags = map(lambda s: s.strip(), row) if filename == "": continue annotations = [dt.make_tag(tag) for tag in tags if len(tag) > 0] annotation_classes = set( [annotation.annotation_class for annotation in annotations]) files.append( dt.AnnotationFile(path, filename, annotation_classes, annotations)) return files
def parse_darwin_video(path: Path, data: Dict[str, Any], count: Optional[int]) -> dt.AnnotationFile: """ Parses the given JSON file in v7's darwin proprietary format. Works for playback videos. Parameters ---------- path : Path Path to the file to parse. data : Dict[str, Any] The decoded JSON file in Python format. count : Optional[int] Optional count parameter. Used only if the data["image"]["seq"] sequence is None. Returns ------- dt.AnnotationFile An AnnotationFile with the information from the parsed JSON file. """ annotations: List[dt.VideoAnnotation] = list( filter(None, map(parse_darwin_video_annotation, data["annotations"]))) annotation_classes: Set[dt.AnnotationClass] = set( [annotation.annotation_class for annotation in annotations]) if "width" not in data["image"] or "height" not in data["image"]: raise OutdatedDarwinJSONFormat( "Missing width/height in video, please re-export") return dt.AnnotationFile( path, get_local_filename(data["image"]), annotation_classes, annotations, True, data["image"].get("width"), data["image"].get("height"), data["image"].get("url"), data["image"].get("workview_url"), data["image"].get("seq", count), data["image"].get("frame_urls"), data["image"].get("path", "/"), )
def parse_darwin_video(path, data, count): annotations = list(filter(None, map(parse_darwin_video_annotation, data["annotations"]))) annotation_classes = set([annotation.annotation_class for annotation in annotations]) if "width" not in data["image"] or "height" not in data["image"]: raise OutdatedDarwinJSONFormat("Missing width/height in video, please re-export") return dt.AnnotationFile( path, get_local_filename(data["image"]), annotation_classes, annotations, True, data["image"]["width"], data["image"]["height"], data["image"]["url"], data["image"].get("workview_url"), data["image"].get("seq", count), data["image"]["frame_urls"], )
def parse_darwin_image(path: Path, data: Dict[str, Any], count: Optional[int]) -> dt.AnnotationFile: """ Parses the given JSON file in v7's darwin proprietary format. Works only for images. Parameters ---------- path : Path Path to the file to parse. data : Dict[str, Any] The decoded JSON file in Python format. count : Optional[int] Optional count parameter. Used only if the 's image sequence is None. Returns ------- dt.AnnotationFile An AnnotationFile with the information from the parsed JSON file. """ annotations: List[dt.Annotation] = list( filter(None, map(parse_darwin_annotation, data["annotations"]))) annotation_classes: Set[dt.AnnotationClass] = set( [annotation.annotation_class for annotation in annotations]) return dt.AnnotationFile( path, get_local_filename(data["image"]), annotation_classes, annotations, False, data["image"].get("width"), data["image"].get("height"), data["image"].get("url"), data["image"].get("workview_url"), data["image"].get("seq", count), None, data["image"].get("path", "/"), )
def annotation_file() -> dt.AnnotationFile: return dt.AnnotationFile(path=Path("test.json"), filename="test.json", annotation_classes=set(), annotations=[])