Пример #1
0
    def load_tracklets_from_hdf(self, filename):
        self.filename = filename
        df = pd.read_hdf(filename)

        # Fill existing gaps
        data = df.to_numpy()
        mask = ~df.columns.get_level_values(
            level="coords").str.contains("likelihood")
        xy = data[:, mask]
        prob = data[:, ~mask]
        missing = np.isnan(xy)
        xy_filled = columnwise_spline_interp(xy, self.max_gap)
        filled = ~np.isnan(xy_filled)
        xy[filled] = xy_filled[filled]
        inds = np.argwhere(missing & filled)
        if inds.size:
            # Retrieve original individual label indices
            inds[:, 1] //= 2
            inds = np.unique(inds, axis=0)
            prob[inds[:, 0], inds[:, 1]] = 0.01
        data[:, mask] = xy
        data[:, ~mask] = prob
        df = pd.DataFrame(data, index=df.index, columns=df.columns)

        idx = df.columns
        self.scorer = idx.get_level_values("scorer").unique().to_list()
        self.bodyparts = idx.get_level_values("bodyparts")
        self.nframes = len(df)
        self.times = np.arange(self.nframes)
        self.data = df.values.reshape((self.nframes, -1, 3)).swapaxes(0, 1)
        self.xy = self.data[:, :, :2]
        self.prob = self.data[:, :, 2]
        individuals = idx.get_level_values("individuals")
        self.individuals = individuals.unique().to_list()
        self.tracklet2id = individuals.map(
            dict(zip(self.individuals,
                     range(len(self.individuals))))).tolist()[::3]
        bodyparts = self.bodyparts.unique()
        self.tracklet2bp = self.bodyparts.map(
            dict(zip(bodyparts, range(len(bodyparts))))).tolist()[::3]
        self._label_pairs = list(idx.droplevel(["scorer", "coords"]).unique())
        self._xy = self.xy.copy()
Пример #2
0
    def _load_tracklets(self, tracklets, auto_fill):
        header = tracklets.pop("header")
        self.scorer = header.get_level_values("scorer").unique().to_list()
        bodyparts = header.get_level_values("bodyparts")
        bodyparts_multi = [
            bp for bp in self.cfg["multianimalbodyparts"] if bp in bodyparts
        ]
        bodyparts_single = self.cfg["uniquebodyparts"]
        mask_multi = bodyparts.isin(bodyparts_multi)
        mask_single = bodyparts.isin(bodyparts_single)
        self.bodyparts = list(
            bodyparts[mask_multi]) * self.nindividuals + list(
                bodyparts[mask_single])

        # Sort tracklets by length to prioritize greater continuity
        temp = sorted(tracklets.values(), key=len)
        if not len(temp):
            raise IOError("Tracklets are empty.")

        get_frame_ind = lambda s: int(re.findall(r"\d+", s)[0])

        # Drop tracklets that are too short
        tracklets_sorted = []
        last_frames = []
        for tracklet in temp:
            last_frames.append(get_frame_ind(list(tracklet)[-1]))
            if len(tracklet) > self.min_tracklet_len:
                tracklets_sorted.append(tracklet)
        self.nframes = max(last_frames) + 1
        self.times = np.arange(self.nframes)

        if auto_fill:  # Recursively fill the data containers
            tracklets_multi = np.full(
                (self.nindividuals, self.nframes, len(bodyparts_multi) * 3),
                np.nan,
                np.float16,
            )
            tracklets_single = np.full(
                (self.nframes, len(bodyparts_single) * 3), np.nan, np.float16)
            for _ in trange(len(tracklets_sorted)):
                tracklet = tracklets_sorted.pop()
                inds, temp = zip(*[(get_frame_ind(k), v)
                                   for k, v in tracklet.items()])
                inds = np.asarray(inds)
                data = np.asarray(temp, dtype=np.float16)
                data_single = data[:, mask_single]
                is_multi = np.isnan(data_single).all()
                if not is_multi:
                    # Where slots are available, copy the data over
                    is_free = np.isnan(tracklets_single[inds])
                    has_data = ~np.isnan(data_single)
                    mask = has_data & is_free
                    rows, cols = np.nonzero(mask)
                    tracklets_single[inds[rows], cols] = data_single[mask]
                    # If about to overwrite data, keep tracklets with highest confidence
                    overwrite = has_data & ~is_free
                    if overwrite.any():
                        rows, cols = np.nonzero(overwrite)
                        more_confident = (data_single[overwrite] >
                                          tracklets_single[inds[rows],
                                                           cols])[2::3]
                        idx = np.flatnonzero(more_confident)
                        for i in idx:
                            sl = slice(i * 3, i * 3 + 3)
                            tracklets_single[inds[rows[sl]],
                                             cols[sl]] = data_single[rows[sl],
                                                                     cols[sl]]
                else:
                    is_free = np.isnan(tracklets_multi[:, inds])
                    data_multi = data[:, mask_multi]
                    has_data = ~np.isnan(data_multi)
                    overwrite = has_data & ~is_free
                    overwrite_risk = np.any(overwrite, axis=(1, 2))
                    if overwrite_risk.all():
                        # Squeeze some data into empty slots
                        n_empty = is_free.all(axis=2).sum(axis=1)
                        for ind in np.argsort(n_empty)[::-1]:
                            mask = has_data & is_free
                            current_mask = mask[ind]
                            rows, cols = np.nonzero(current_mask)
                            if rows.size:
                                tracklets_multi[
                                    ind, inds[rows],
                                    cols] = data_multi[current_mask]
                                is_free[ind, current_mask] = False
                                has_data[current_mask] = False
                        if has_data.any():
                            # For the remaining data, overwrite where we are least confident
                            remaining = data_multi[has_data].reshape((-1, 3))
                            mask3d = np.broadcast_to(has_data,
                                                     (self.nindividuals, ) +
                                                     has_data.shape)
                            dims, rows, cols = np.nonzero(mask3d)
                            temp = tracklets_multi[dims, inds[rows],
                                                   cols].reshape(
                                                       (self.nindividuals, -1,
                                                        3))
                            diff = remaining - temp
                            # Find keypoints closest to the remaining data
                            # Use Manhattan distance to avoid overflow
                            dist = np.abs(diff[:, :, 0]) + np.abs(diff[:, :,
                                                                       1])
                            closest = np.argmin(dist, axis=0)
                            # Only overwrite if improving confidence
                            prob = diff[closest, range(len(closest)), 2]
                            better = np.flatnonzero(prob > 0)
                            idx = closest[better]
                            rows, cols = np.nonzero(has_data)
                            for i, j in zip(idx, better):
                                sl = slice(j * 3, j * 3 + 3)
                                tracklets_multi[i, inds[rows[sl]],
                                                cols[sl]] = remaining.flat[sl]
                    else:
                        rows, cols = np.nonzero(has_data)
                        n = np.argmin(overwrite_risk)
                        tracklets_multi[n, inds[rows],
                                        cols] = data_multi[has_data]

            multi = tracklets_multi.swapaxes(0, 1).reshape((self.nframes, -1))
            data = np.c_[multi, tracklets_single].reshape(
                (self.nframes, -1, 3))
            xy = data[:, :, :2].reshape((self.nframes, -1))
            prob = data[:, :, 2].reshape((self.nframes, -1))

            # Fill existing gaps
            missing = np.isnan(xy)
            xy_filled = columnwise_spline_interp(xy, self.max_gap)
            filled = ~np.isnan(xy_filled)
            xy[filled] = xy_filled[filled]
            inds = np.argwhere(missing & filled)
            if inds.size:
                # Retrieve original individual label indices
                inds[:, 1] //= 2
                inds = np.unique(inds, axis=0)
                prob[inds[:, 0], inds[:, 1]] = 0.01
            data[:, :, :2] = xy.reshape((self.nframes, -1, 2))
            data[:, :, 2] = prob
            self.data = data.swapaxes(0, 1)
            self.xy = self.data[:, :, :2]
            self.prob = self.data[:, :, 2]

            # Map a tracklet # to the animal ID it belongs to or the bodypart # it corresponds to.
            self.individuals = self.cfg["individuals"] + (["single"] if len(
                self.cfg["uniquebodyparts"]) else [])
            self.tracklet2id = [
                i for i in range(0, self.nindividuals) for _ in bodyparts_multi
            ] + [self.nindividuals] * len(bodyparts_single)
            bps = bodyparts_multi + bodyparts_single
            map_ = dict(zip(bps, range(len(bps))))
            self.tracklet2bp = [map_[bp] for bp in self.bodyparts[::3]]
            self._label_pairs = self.get_label_pairs()
        else:
            tracklets_raw = np.full(
                (len(tracklets_sorted), self.nframes, len(bodyparts)),
                np.nan,
                np.float16,
            )
            for n, tracklet in enumerate(tracklets_sorted[::-1]):
                for frame, data in tracklet.items():
                    i = get_frame_ind(frame)
                    tracklets_raw[n, i] = data
            self.data = (tracklets_raw.swapaxes(0, 1).reshape(
                (self.nframes, -1, 3)).swapaxes(0, 1))
            self.xy = self.data[:, :, :2]
            self.prob = self.data[:, :, 2]
            self.tracklet2id = self.tracklet2bp = [0] * self.data.shape[0]
Пример #3
0
    def _load_tracklets(self, tracklets, auto_fill):
        header = tracklets.pop("header")
        self.scorer = header.get_level_values("scorer").unique().to_list()
        frames = sorted(
            set([
                frame for tracklet in tracklets.values() for frame in tracklet
            ]))
        if not len(frames):
            raise IOError("Tracklets are empty.")

        self.nframes = int(re.findall(r"\d+", frames[-1])[0]) + 1
        self.times = np.arange(self.nframes)
        bodyparts = header.get_level_values("bodyparts")
        bodyparts_multi = [
            bp for bp in self.cfg["multianimalbodyparts"] if bp in bodyparts
        ]
        bodyparts_single = self.cfg["uniquebodyparts"]
        mask_multi = bodyparts.isin(bodyparts_multi)
        mask_single = bodyparts.isin(bodyparts_single)
        self.bodyparts = list(
            bodyparts[mask_multi]) * self.nindividuals + list(
                bodyparts[mask_single])

        # Store tracklets, such that we later manipulate long chains
        # rather than data of individual frames, yielding greater continuity.
        tracklets_unsorted = dict()
        for num_tracklet in sorted(tracklets):
            to_fill = np.full((self.nframes, len(bodyparts)), np.nan)
            for frame_name, data in tracklets[num_tracklet].items():
                ind_frame = int(re.findall(r"\d+", frame_name)[0])
                to_fill[ind_frame] = data
            nonempty = np.any(~np.isnan(to_fill), axis=1)
            completeness = nonempty.sum() / self.nframes
            if completeness >= self.min_tracklet_frac:
                is_single = np.isnan(to_fill[:, mask_multi]).all()
                if is_single:
                    to_fill = to_fill[:, mask_single]
                else:
                    to_fill = to_fill[:, mask_multi]
                if to_fill.size:
                    tracklets_unsorted[
                        num_tracklet] = to_fill, completeness, is_single
        tracklets_sorted = sorted(tracklets_unsorted.items(),
                                  key=lambda kv: kv[1][1])

        if auto_fill:
            # Recursively fill the data containers
            tracklets_multi = np.full(
                (self.nindividuals, self.nframes, len(bodyparts_multi) * 3),
                np.nan)
            tracklets_single = np.full(
                (self.nframes, len(bodyparts_single) * 3), np.nan)
            while tracklets_sorted:
                _, (data, _, is_single) = tracklets_sorted.pop()
                has_data = ~np.isnan(data)
                if is_single:
                    # Where slots are available, copy the data over
                    is_free = np.isnan(tracklets_single)
                    mask = has_data & is_free
                    tracklets_single[mask] = data[mask]
                    # If about to overwrite data, keep tracklets with highest confidence
                    overwrite = has_data & ~is_free
                    if overwrite.any():
                        rows, cols = np.nonzero(overwrite)
                        more_confident = (data[overwrite] >
                                          tracklets_single[overwrite])[2::3]
                        inds = np.flatnonzero(more_confident)
                        for ind in inds:
                            sl = slice(ind * 3, ind * 3 + 3)
                            inds = rows[sl], cols[sl]
                            tracklets_single[inds] = data[inds]
                else:
                    is_free = np.isnan(tracklets_multi)
                    overwrite = has_data & ~is_free
                    overwrite_risk = np.any(overwrite, axis=(1, 2))
                    if overwrite_risk.all():
                        # Squeeze some data into empty slots
                        mask = has_data & is_free
                        space_left = mask.any(axis=(1, 2))
                        for ind in np.flatnonzero(space_left):
                            current_mask = mask[ind]
                            tracklets_multi[ind,
                                            current_mask] = data[current_mask]
                            has_data[current_mask] = False
                        # For the remaining data, overwrite where we are least confident
                        remaining = data[has_data].reshape((-1, 3))
                        mask3d = np.broadcast_to(
                            has_data, (self.nindividuals, ) + has_data.shape)
                        temp = tracklets_multi[mask3d].reshape(
                            (self.nindividuals, -1, 3))
                        diff = remaining - temp
                        # Find keypoints closest to the remaining data
                        dist = np.sqrt(diff[:, :, 0]**2 + diff[:, :, 1]**2)
                        closest = np.argmin(dist, axis=0)
                        # Only overwrite if improving confidence
                        prob = diff[closest, range(len(closest)), 2]
                        better = np.flatnonzero(prob > 0)
                        inds = closest[better]
                        rows, cols = np.nonzero(has_data)
                        for i, j in zip(inds, better):
                            sl = slice(j * 3, j * 3 + 3)
                            tracklets_multi[i, rows[sl],
                                            cols[sl]] = remaining.flat[sl]
                    else:
                        tracklets_multi[np.argmin(overwrite_risk),
                                        has_data] = data[has_data]

            multi = tracklets_multi.swapaxes(0, 1).reshape((self.nframes, -1))
            data = np.c_[multi, tracklets_single].reshape(
                (self.nframes, -1, 3))
            xy = data[:, :, :2].reshape((self.nframes, -1))
            prob = data[:, :, 2].reshape((self.nframes, -1))

            # Fill existing gaps and slightly smooth the tracklets
            missing = np.isnan(xy)
            xy_filled = columnwise_spline_interp(xy, self.max_gap)
            filled = ~np.isnan(xy_filled)
            xy[filled] = xy_filled[filled]
            inds = np.argwhere(missing & filled)
            if inds.size:
                # Retrieve original individual label indices
                inds[:, 1] //= 2
                inds = np.unique(inds, axis=0)
                prob[inds[:, 0], inds[:, 1]] = 0.01
            data[:, :, :2] = xy.reshape((self.nframes, -1, 2))
            data[:, :, 2] = prob
            self.data = data.swapaxes(0, 1)
            self.xy = self.data[:, :, :2]
            self.prob = self.data[:, :, 2]

            # Map a tracklet # to the animal ID it belongs to or the bodypart # it corresponds to.
            self.individuals = self.cfg["individuals"] + (["single"] if len(
                self.cfg["uniquebodyparts"]) else [])
            self.tracklet2id = [
                i for i in range(0, self.nindividuals) for _ in bodyparts_multi
            ] + [self.nindividuals] * len(bodyparts_single)
            bps = bodyparts_multi + bodyparts_single
            map_ = dict(zip(bps, range(len(bps))))
            self.tracklet2bp = [map_[bp] for bp in self.bodyparts[::3]]
            self._label_pairs = self.get_label_pairs()
        else:
            tracklets_raw = np.full(
                (len(tracklets_sorted), self.nframes, len(bodyparts)), np.nan)
            for n, data in enumerate(tracklets_sorted[::-1]):
                xy = data[1][0]
                tracklets_raw[n, :, :xy.shape[1]] = xy
            self.data = (tracklets_raw.swapaxes(0, 1).reshape(
                (self.nframes, -1, 3)).swapaxes(0, 1))
            self.xy = self.data[:, :, :2]
            self.prob = self.data[:, :, 2]
            self.tracklet2id = self.tracklet2bp = [0] * self.data.shape[0]