Пример #1
0
    def read_headers(
        cls,
        file: format.filehandle.FileHandle,
        video_search: Union[Callable, List[Text], None] = None,
        match_to: Optional[Labels] = None,
    ):
        f = file.file

        # Extract the Labels JSON metadata and create Labels object with just this
        # metadata.
        dicts = json_loads(
            f.require_group("metadata").attrs["json"].tostring().decode()
        )

        # These items are stored in separate lists because the metadata group got to be
        # too big.
        for key in ("videos", "tracks", "suggestions"):
            hdf5_key = f"{key}_json"
            if hdf5_key in f:
                items = [json_loads(item_json) for item_json in f[hdf5_key]]
                dicts[key] = items

        # Video path "." means the video is saved in same file as labels, so replace
        # these paths.
        for video_item in dicts["videos"]:
            if video_item["backend"]["filename"] == ".":
                video_item["backend"]["filename"] = file.filename

        # Use the video_callback for finding videos with broken paths:

        # 1. Accept single string as video search path
        if isinstance(video_search, str):
            video_search = [video_search]

        # 2. Accept list of strings as video search paths
        if hasattr(video_search, "__iter__"):
            # If the callback is an iterable, then we'll expect it to be a list of
            # strings and build a non-gui callback with those as the search paths.
            search_paths = [
                # os.path.dirname(path) if os.path.isfile(path) else path
                path
                for path in video_search
            ]

            # Make the search function from list of paths
            video_search = Labels.make_video_callback(search_paths)

        # 3. Use the callback function (either given as arg or build from paths)
        if callable(video_search):
            video_search(dicts["videos"])

        # Create the Labels object with the header data we've loaded
        labels = labels_json.LabelsJsonAdaptor.from_json_data(dicts, match_to=match_to)

        return labels
Пример #2
0
    def _load(self):
        if self.__tried_to_load:
            return

        self.__tried_to_load = True

        # Handle cases where the user feeds in h5.File objects instead of filename
        if isinstance(self.filename, h5.File):
            self.__file_h5 = self.filename
            self.filename = self.__file_h5.filename
        elif type(self.filename) is str:
            try:
                self.__file_h5 = h5.File(self.filename, "r")
            except OSError as ex:
                raise FileNotFoundError(
                    f"Could not find HDF5 file {self.filename}"
                ) from ex
        else:
            self.__file_h5 = None

        # Handle the case when h5.Dataset is passed in
        if isinstance(self.dataset, h5.Dataset):
            self.__dataset_h5 = self.dataset
            self.__file_h5 = self.__dataset_h5.file
            self.dataset = self.__dataset_h5.name

        # File loaded and dataset name given, so load dataset
        elif isinstance(self.dataset, str) and (self.__file_h5 is not None):
            # dataset = "video0" passed:
            if self.dataset + "/video" in self.__file_h5:
                self.__dataset_h5 = self.__file_h5[self.dataset + "/video"]
                base_dataset_path = self.dataset
            else:
                # dataset = "video0/video" passed:
                self.__dataset_h5 = self.__file_h5[self.dataset]
                base_dataset_path = "/".join(self.dataset.split("/")[:-1])

            # Check for frame_numbers dataset corresponding to video
            framenum_dataset = f"{base_dataset_path}/frame_numbers"
            if framenum_dataset in self.__file_h5:
                original_idx_lists = self.__file_h5[framenum_dataset]
                # Create map from idx in original video to idx in current
                for current_idx in range(len(original_idx_lists)):
                    original_idx = original_idx_lists[current_idx]
                    self.__original_to_current_frame_idx[original_idx] = current_idx

            source_video_group = f"{base_dataset_path}/source_video"
            if source_video_group in self.__file_h5:
                d = json_loads(
                    self.__file_h5.require_group(source_video_group).attrs["json"]
                )

                self._source_video = Video.cattr().structure(d, Video)
Пример #3
0
    def read(
        cls,
        file: FileHandle,
        video_search: Union[Callable, List[Text], None] = None,
        match_to: Optional[Labels] = None,
        *args,
        **kwargs,
    ) -> Labels:
        pass
        """
        Deserialize JSON file as new :class:`Labels` instance.

        Args:
            filename: Path to JSON file.
            video_callback: A callback function that which can modify
                video paths before we try to create the corresponding
                :class:`Video` objects. Usually you'll want to pass
                a callback created by :meth:`make_video_callback`
                or :meth:`make_gui_video_callback`.
                Alternately, if you pass a list of strings we'll construct a
                non-gui callback with those strings as the search paths.
            match_to: If given, we'll replace particular objects in the
                data dictionary with *matching* objects in the match_to
                :class:`Labels` object. This ensures that the newly
                instantiated :class:`Labels` can be merged without
                duplicate matching objects (e.g., :class:`Video` objects ).
        Returns:
            A new :class:`Labels` object.
        """

        tmp_dir = None
        filename = file.filename

        # Check if the file is a zipfile for not.
        if zipfile.is_zipfile(filename):

            # Make a tmpdir, located in the directory that the file exists, to unzip
            # its contents.
            tmp_dir = os.path.join(
                os.path.dirname(filename),
                f"tmp_{os.getpid()}_{os.path.basename(filename)}",
            )
            if os.path.exists(tmp_dir):
                shutil.rmtree(tmp_dir, ignore_errors=True)
            try:
                os.mkdir(tmp_dir)
            except FileExistsError:
                pass

            # tmp_dir = tempfile.mkdtemp(dir=os.path.dirname(filename))

            try:

                # Register a cleanup routine that deletes the tmpdir on program exit
                # if something goes wrong. The True is for ignore_errors
                atexit.register(shutil.rmtree, tmp_dir, True)

                # Uncompress the data into the directory
                shutil.unpack_archive(filename, extract_dir=tmp_dir)

                # We can now open the JSON file, save the zip file and
                # replace file with the first JSON file we find in the archive.
                json_files = [
                    os.path.join(tmp_dir, file) for file in os.listdir(tmp_dir)
                    if file.endswith(".json")
                ]

                if len(json_files) == 0:
                    raise ValueError(
                        f"No JSON file found inside {filename}. Are you sure this is a valid sLEAP dataset."
                    )

                filename = json_files[0]

            except Exception as ex:
                # If we had problems, delete the temp directory and reraise the exception.
                shutil.rmtree(tmp_dir, ignore_errors=True)
                raise

        # Open and parse the JSON in filename
        with open(filename, "r") as file:

            # FIXME: Peek into the json to see if there is version string.
            # We do this to tell apart old JSON data from leap_dev vs the
            # newer format for sLEAP.
            json_str = file.read()
            dicts = json_loads(json_str)

            # If we have a version number, then it is new sLEAP format
            if "version" in dicts:

                # Cache the working directory.
                cwd = os.getcwd()
                # Replace local video paths (for imagestore)
                if tmp_dir:
                    for vid in dicts["videos"]:
                        vid["backend"]["filename"] = os.path.join(
                            tmp_dir, vid["backend"]["filename"])

                # Use the video_callback for finding videos with broken paths:

                # 1. Accept single string as video search path
                if isinstance(video_search, str):
                    video_search = [video_search]

                # 2. Accept list of strings as video search paths
                if hasattr(video_search, "__iter__"):
                    # If the callback is an iterable, then we'll expect it to be a
                    # list of strings and build a non-gui callback with those as
                    # the search paths.
                    # When path is to a file, use the path of parent directory.
                    search_paths = [
                        os.path.dirname(path) if os.path.isfile(path) else path
                        for path in video_search
                    ]

                    # Make the search function from list of paths
                    video_search = Labels.make_video_callback(search_paths)

                # 3. Use the callback function (either given as arg or build from paths)
                if callable(video_search):
                    abort = video_search(dicts["videos"])
                    if abort:
                        raise FileNotFoundError

                # Try to load the labels filename.
                try:
                    labels = cls.from_json_data(dicts, match_to=match_to)

                except FileNotFoundError:

                    # FIXME: We are going to the labels JSON that has references to
                    # video files. Lets change directory to the dirname of the json file
                    # so that relative paths will be from this directory. Maybe
                    # it is better to feed the dataset dirname all the way down to
                    # the Video object. This seems like less coupling between classes
                    # though.
                    if os.path.dirname(filename) != "":
                        os.chdir(os.path.dirname(filename))

                    # Try again
                    labels = cls.from_json_data(dicts, match_to=match_to)

                except Exception as ex:
                    # Ok, we give up, where the hell are these videos!
                    raise  # Re-raise.
                finally:
                    os.chdir(
                        cwd)  # Make sure to change back if we have problems.

                return labels

            else:
                frames = load_labels_json_old(data_path=filename,
                                              parsed_json=dicts)
                return Labels(frames)
Пример #4
0
    def from_json_data(cls,
                       data: Union[str, dict],
                       match_to: Optional["Labels"] = None) -> "Labels":
        """
        Create instance of class from data in dictionary.

        Method is used by other methods that load from JSON.

        Args:
            data: Dictionary, deserialized from JSON.
            match_to: If given, we'll replace particular objects in the
                data dictionary with *matching* objects in the match_to
                :class:`Labels` object. This ensures that the newly
                instantiated :class:`Labels` can be merged without
                duplicate matching objects (e.g., :class:`Video` objects ).
        Returns:
            A new :class:`Labels` object.
        """

        # Parse the json string if needed.
        if type(data) is str:
            dicts = json_loads(data)
        else:
            dicts = data

        dicts["tracks"] = dicts.get(
            "tracks", [])  # don't break if json doesn't include tracks

        # First, deserialize the skeletons, videos, and nodes lists.
        # The labels reference these so we will need them while deserializing.
        nodes = cattr.structure(dicts["nodes"], List[Node])

        idx_to_node = {i: nodes[i] for i in range(len(nodes))}
        skeletons = Skeleton.make_cattr(idx_to_node).structure(
            dicts["skeletons"], List[Skeleton])
        videos = Video.cattr().structure(dicts["videos"], List[Video])

        try:
            # First try unstructuring tuple (newer format)
            track_cattr = cattr.Converter(
                unstruct_strat=cattr.UnstructureStrategy.AS_TUPLE)
            tracks = track_cattr.structure(dicts["tracks"], List[Track])
        except:
            # Then try unstructuring dict (older format)
            try:
                tracks = cattr.structure(dicts["tracks"], List[Track])
            except:
                raise ValueError("Unable to load tracks as tuple or dict!")

        # if we're given a Labels object to match, use its objects when they match
        if match_to is not None:
            for idx, sk in enumerate(skeletons):
                for old_sk in match_to.skeletons:
                    if sk.matches(old_sk):
                        # use nodes from matched skeleton
                        for (node, match_node) in zip(sk.nodes, old_sk.nodes):
                            node_idx = nodes.index(node)
                            nodes[node_idx] = match_node
                        # use skeleton from match
                        skeletons[idx] = old_sk
                        break
            for idx, vid in enumerate(videos):
                for old_vid in match_to.videos:

                    # Try to match videos using either their current or source filename
                    # if available.
                    old_vid_paths = [old_vid.filename]
                    if getattr(old_vid.backend, "has_embedded_images", False):
                        old_vid_paths.append(
                            old_vid.backend._source_video.filename)

                    new_vid_paths = [vid.filename]
                    if getattr(vid.backend, "has_embedded_images", False):
                        new_vid_paths.append(
                            vid.backend._source_video.filename)

                    is_match = False
                    for old_vid_path in old_vid_paths:
                        for new_vid_path in new_vid_paths:
                            if old_vid_path == new_vid_path or weak_filename_match(
                                    old_vid_path, new_vid_path):
                                is_match = True
                                videos[idx] = old_vid
                                break
                        if is_match:
                            break
                    if is_match:
                        break

        suggestions = []
        if "suggestions" in dicts:
            suggestions_cattr = cattr.Converter()
            suggestions_cattr.register_structure_hook(
                Video, lambda x, type: videos[int(x)])
            try:
                suggestions = suggestions_cattr.structure(
                    dicts["suggestions"], List[SuggestionFrame])
            except Exception as e:
                print("Error while loading suggestions (1)")
                print(e)

                try:
                    # Convert old suggestion format to new format.
                    # Old format: {video: list of frame indices}
                    # New format: [SuggestionFrames]
                    old_suggestions = suggestions_cattr.structure(
                        dicts["suggestions"], Dict[Video, List])
                    for video in old_suggestions.keys():
                        suggestions.extend([
                            SuggestionFrame(video, idx)
                            for idx in old_suggestions[video]
                        ])
                except Exception as e:
                    print("Error while loading suggestions (2)")
                    print(e)
                    pass

        if "negative_anchors" in dicts:
            negative_anchors_cattr = cattr.Converter()
            negative_anchors_cattr.register_structure_hook(
                Video, lambda x, type: videos[int(x)])
            negative_anchors = negative_anchors_cattr.structure(
                dicts["negative_anchors"], Dict[Video, List])
        else:
            negative_anchors = dict()

        if "provenance" in dicts:
            provenance = dicts["provenance"]
        else:
            provenance = dict()

        # If there is actual labels data, get it.
        if "labels" in dicts:
            label_cattr = make_instance_cattr()
            label_cattr.register_structure_hook(
                Skeleton, lambda x, type: skeletons[int(x)])
            label_cattr.register_structure_hook(Video,
                                                lambda x, type: videos[int(x)])
            label_cattr.register_structure_hook(
                Node, lambda x, type: x
                if isinstance(x, Node) else nodes[int(x)])
            label_cattr.register_structure_hook(
                Track, lambda x, type: None if x is None else tracks[int(x)])

            labels = label_cattr.structure(dicts["labels"], List[LabeledFrame])
        else:
            labels = []

        return Labels(
            labeled_frames=labels,
            videos=videos,
            skeletons=skeletons,
            nodes=nodes,
            suggestions=suggestions,
            negative_anchors=negative_anchors,
            tracks=tracks,
            provenance=provenance,
        )
Пример #5
0
 def json(self):
     """The loaded JSON dictionary (for a JSON file)."""
     if self._json is None:
         self._json = json_loads(self.text)
     return self._json
Пример #6
0
 def json(self):
     if self._json is None:
         self._json = json_loads(self.text)
     return self._json
Пример #7
0
def load_labels_json_old(
    data_path: str,
    parsed_json: dict = None,
    adjust_matlab_indexing: bool = True,
    fix_rel_paths: bool = True,
) -> List[LabeledFrame]:
    """
    Load predicted instances from Talmo's old JSON format.

    Args:
        data_path: The path to the JSON file.
        parsed_json: The parsed json if already loaded, so we can save
            some time if already parsed.
        adjust_matlab_indexing: Whether to adjust indexing from MATLAB.
        fix_rel_paths: Whether to fix paths to videos to absolute paths.

    Returns:
        A newly constructed Labels object.
    """
    if parsed_json is None:
        data = json_loads(open(data_path).read())
    else:
        data = parsed_json

    videos = pd.DataFrame(data["videos"])
    instances = pd.DataFrame(data["instances"])
    points = pd.DataFrame(data["points"])
    predicted_instances = pd.DataFrame(data["predicted_instances"])
    predicted_points = pd.DataFrame(data["predicted_points"])

    if adjust_matlab_indexing:
        instances.frameIdx -= 1
        points.frameIdx -= 1
        predicted_instances.frameIdx -= 1
        predicted_points.frameIdx -= 1

        points.node -= 1
        predicted_points.node -= 1

        points.x -= 1
        predicted_points.x -= 1

        points.y -= 1
        predicted_points.y -= 1

    skeleton = Skeleton()
    skeleton.add_nodes(data["skeleton"]["nodeNames"])
    edges = data["skeleton"]["edges"]
    if adjust_matlab_indexing:
        edges = np.array(edges) - 1
    for (src_idx, dst_idx) in edges:
        skeleton.add_edge(
            data["skeleton"]["nodeNames"][src_idx],
            data["skeleton"]["nodeNames"][dst_idx],
        )

    if fix_rel_paths:
        for i, row in videos.iterrows():
            p = row.filepath
            if not os.path.exists(p):
                p = os.path.join(os.path.dirname(data_path), p)
                if os.path.exists(p):
                    videos.at[i, "filepath"] = p

    # Make the video objects
    video_objects = {}
    for i, row in videos.iterrows():
        if videos.at[i, "format"] == "media":
            vid = Video.from_media(videos.at[i, "filepath"])
        else:
            vid = Video.from_hdf5(
                filename=videos.at[i, "filepath"], dataset=videos.at[i, "dataset"]
            )

        video_objects[videos.at[i, "id"]] = vid

    # A function to get all the instances for a particular video frame
    def get_frame_instances(video_id, frame_idx):
        """ """
        is_in_frame = (points["videoId"] == video_id) & (
            points["frameIdx"] == frame_idx
        )
        if not is_in_frame.any():
            return []

        instances = []
        frame_instance_ids = np.unique(points["instanceId"][is_in_frame])
        for i, instance_id in enumerate(frame_instance_ids):
            is_instance = is_in_frame & (points["instanceId"] == instance_id)
            instance_points = {
                data["skeleton"]["nodeNames"][n]: Point(x, y, visible=v)
                for x, y, n, v in zip(
                    *[points[k][is_instance] for k in ["x", "y", "node", "visible"]]
                )
            }

            instance = Instance(skeleton=skeleton, points=instance_points)
            instances.append(instance)

        return instances

    # Get the unique labeled frames and construct a list of LabeledFrame objects for them.
    frame_keys = list(
        {
            (videoId, frameIdx)
            for videoId, frameIdx in zip(points["videoId"], points["frameIdx"])
        }
    )
    frame_keys.sort()
    labels = []
    for videoId, frameIdx in frame_keys:
        label = LabeledFrame(
            video=video_objects[videoId],
            frame_idx=frameIdx,
            instances=get_frame_instances(videoId, frameIdx),
        )
        labels.append(label)

    return labels