def load_tracklets_from_hdf(self, filename): self.filename = filename df = pd.read_hdf(filename) # Fill existing gaps data = df.to_numpy() mask = ~df.columns.get_level_values( level="coords").str.contains("likelihood") xy = data[:, mask] prob = data[:, ~mask] missing = np.isnan(xy) xy_filled = columnwise_spline_interp(xy, self.max_gap) filled = ~np.isnan(xy_filled) xy[filled] = xy_filled[filled] inds = np.argwhere(missing & filled) if inds.size: # Retrieve original individual label indices inds[:, 1] //= 2 inds = np.unique(inds, axis=0) prob[inds[:, 0], inds[:, 1]] = 0.01 data[:, mask] = xy data[:, ~mask] = prob df = pd.DataFrame(data, index=df.index, columns=df.columns) idx = df.columns self.scorer = idx.get_level_values("scorer").unique().to_list() self.bodyparts = idx.get_level_values("bodyparts") self.nframes = len(df) self.times = np.arange(self.nframes) self.data = df.values.reshape((self.nframes, -1, 3)).swapaxes(0, 1) self.xy = self.data[:, :, :2] self.prob = self.data[:, :, 2] individuals = idx.get_level_values("individuals") self.individuals = individuals.unique().to_list() self.tracklet2id = individuals.map( dict(zip(self.individuals, range(len(self.individuals))))).tolist()[::3] bodyparts = self.bodyparts.unique() self.tracklet2bp = self.bodyparts.map( dict(zip(bodyparts, range(len(bodyparts))))).tolist()[::3] self._label_pairs = list(idx.droplevel(["scorer", "coords"]).unique()) self._xy = self.xy.copy()
def _load_tracklets(self, tracklets, auto_fill): header = tracklets.pop("header") self.scorer = header.get_level_values("scorer").unique().to_list() bodyparts = header.get_level_values("bodyparts") bodyparts_multi = [ bp for bp in self.cfg["multianimalbodyparts"] if bp in bodyparts ] bodyparts_single = self.cfg["uniquebodyparts"] mask_multi = bodyparts.isin(bodyparts_multi) mask_single = bodyparts.isin(bodyparts_single) self.bodyparts = list( bodyparts[mask_multi]) * self.nindividuals + list( bodyparts[mask_single]) # Sort tracklets by length to prioritize greater continuity temp = sorted(tracklets.values(), key=len) if not len(temp): raise IOError("Tracklets are empty.") get_frame_ind = lambda s: int(re.findall(r"\d+", s)[0]) # Drop tracklets that are too short tracklets_sorted = [] last_frames = [] for tracklet in temp: last_frames.append(get_frame_ind(list(tracklet)[-1])) if len(tracklet) > self.min_tracklet_len: tracklets_sorted.append(tracklet) self.nframes = max(last_frames) + 1 self.times = np.arange(self.nframes) if auto_fill: # Recursively fill the data containers tracklets_multi = np.full( (self.nindividuals, self.nframes, len(bodyparts_multi) * 3), np.nan, np.float16, ) tracklets_single = np.full( (self.nframes, len(bodyparts_single) * 3), np.nan, np.float16) for _ in trange(len(tracklets_sorted)): tracklet = tracklets_sorted.pop() inds, temp = zip(*[(get_frame_ind(k), v) for k, v in tracklet.items()]) inds = np.asarray(inds) data = np.asarray(temp, dtype=np.float16) data_single = data[:, mask_single] is_multi = np.isnan(data_single).all() if not is_multi: # Where slots are available, copy the data over is_free = np.isnan(tracklets_single[inds]) has_data = ~np.isnan(data_single) mask = has_data & is_free rows, cols = np.nonzero(mask) tracklets_single[inds[rows], cols] = data_single[mask] # If about to overwrite data, keep tracklets with highest confidence overwrite = has_data & ~is_free if overwrite.any(): rows, cols = np.nonzero(overwrite) more_confident = (data_single[overwrite] > tracklets_single[inds[rows], cols])[2::3] idx = np.flatnonzero(more_confident) for i in idx: sl = slice(i * 3, i * 3 + 3) tracklets_single[inds[rows[sl]], cols[sl]] = data_single[rows[sl], cols[sl]] else: is_free = np.isnan(tracklets_multi[:, inds]) data_multi = data[:, mask_multi] has_data = ~np.isnan(data_multi) overwrite = has_data & ~is_free overwrite_risk = np.any(overwrite, axis=(1, 2)) if overwrite_risk.all(): # Squeeze some data into empty slots n_empty = is_free.all(axis=2).sum(axis=1) for ind in np.argsort(n_empty)[::-1]: mask = has_data & is_free current_mask = mask[ind] rows, cols = np.nonzero(current_mask) if rows.size: tracklets_multi[ ind, inds[rows], cols] = data_multi[current_mask] is_free[ind, current_mask] = False has_data[current_mask] = False if has_data.any(): # For the remaining data, overwrite where we are least confident remaining = data_multi[has_data].reshape((-1, 3)) mask3d = np.broadcast_to(has_data, (self.nindividuals, ) + has_data.shape) dims, rows, cols = np.nonzero(mask3d) temp = tracklets_multi[dims, inds[rows], cols].reshape( (self.nindividuals, -1, 3)) diff = remaining - temp # Find keypoints closest to the remaining data # Use Manhattan distance to avoid overflow dist = np.abs(diff[:, :, 0]) + np.abs(diff[:, :, 1]) closest = np.argmin(dist, axis=0) # Only overwrite if improving confidence prob = diff[closest, range(len(closest)), 2] better = np.flatnonzero(prob > 0) idx = closest[better] rows, cols = np.nonzero(has_data) for i, j in zip(idx, better): sl = slice(j * 3, j * 3 + 3) tracklets_multi[i, inds[rows[sl]], cols[sl]] = remaining.flat[sl] else: rows, cols = np.nonzero(has_data) n = np.argmin(overwrite_risk) tracklets_multi[n, inds[rows], cols] = data_multi[has_data] multi = tracklets_multi.swapaxes(0, 1).reshape((self.nframes, -1)) data = np.c_[multi, tracklets_single].reshape( (self.nframes, -1, 3)) xy = data[:, :, :2].reshape((self.nframes, -1)) prob = data[:, :, 2].reshape((self.nframes, -1)) # Fill existing gaps missing = np.isnan(xy) xy_filled = columnwise_spline_interp(xy, self.max_gap) filled = ~np.isnan(xy_filled) xy[filled] = xy_filled[filled] inds = np.argwhere(missing & filled) if inds.size: # Retrieve original individual label indices inds[:, 1] //= 2 inds = np.unique(inds, axis=0) prob[inds[:, 0], inds[:, 1]] = 0.01 data[:, :, :2] = xy.reshape((self.nframes, -1, 2)) data[:, :, 2] = prob self.data = data.swapaxes(0, 1) self.xy = self.data[:, :, :2] self.prob = self.data[:, :, 2] # Map a tracklet # to the animal ID it belongs to or the bodypart # it corresponds to. self.individuals = self.cfg["individuals"] + (["single"] if len( self.cfg["uniquebodyparts"]) else []) self.tracklet2id = [ i for i in range(0, self.nindividuals) for _ in bodyparts_multi ] + [self.nindividuals] * len(bodyparts_single) bps = bodyparts_multi + bodyparts_single map_ = dict(zip(bps, range(len(bps)))) self.tracklet2bp = [map_[bp] for bp in self.bodyparts[::3]] self._label_pairs = self.get_label_pairs() else: tracklets_raw = np.full( (len(tracklets_sorted), self.nframes, len(bodyparts)), np.nan, np.float16, ) for n, tracklet in enumerate(tracklets_sorted[::-1]): for frame, data in tracklet.items(): i = get_frame_ind(frame) tracklets_raw[n, i] = data self.data = (tracklets_raw.swapaxes(0, 1).reshape( (self.nframes, -1, 3)).swapaxes(0, 1)) self.xy = self.data[:, :, :2] self.prob = self.data[:, :, 2] self.tracklet2id = self.tracklet2bp = [0] * self.data.shape[0]
def _load_tracklets(self, tracklets, auto_fill): header = tracklets.pop("header") self.scorer = header.get_level_values("scorer").unique().to_list() frames = sorted( set([ frame for tracklet in tracklets.values() for frame in tracklet ])) if not len(frames): raise IOError("Tracklets are empty.") self.nframes = int(re.findall(r"\d+", frames[-1])[0]) + 1 self.times = np.arange(self.nframes) bodyparts = header.get_level_values("bodyparts") bodyparts_multi = [ bp for bp in self.cfg["multianimalbodyparts"] if bp in bodyparts ] bodyparts_single = self.cfg["uniquebodyparts"] mask_multi = bodyparts.isin(bodyparts_multi) mask_single = bodyparts.isin(bodyparts_single) self.bodyparts = list( bodyparts[mask_multi]) * self.nindividuals + list( bodyparts[mask_single]) # Store tracklets, such that we later manipulate long chains # rather than data of individual frames, yielding greater continuity. tracklets_unsorted = dict() for num_tracklet in sorted(tracklets): to_fill = np.full((self.nframes, len(bodyparts)), np.nan) for frame_name, data in tracklets[num_tracklet].items(): ind_frame = int(re.findall(r"\d+", frame_name)[0]) to_fill[ind_frame] = data nonempty = np.any(~np.isnan(to_fill), axis=1) completeness = nonempty.sum() / self.nframes if completeness >= self.min_tracklet_frac: is_single = np.isnan(to_fill[:, mask_multi]).all() if is_single: to_fill = to_fill[:, mask_single] else: to_fill = to_fill[:, mask_multi] if to_fill.size: tracklets_unsorted[ num_tracklet] = to_fill, completeness, is_single tracklets_sorted = sorted(tracklets_unsorted.items(), key=lambda kv: kv[1][1]) if auto_fill: # Recursively fill the data containers tracklets_multi = np.full( (self.nindividuals, self.nframes, len(bodyparts_multi) * 3), np.nan) tracklets_single = np.full( (self.nframes, len(bodyparts_single) * 3), np.nan) while tracklets_sorted: _, (data, _, is_single) = tracklets_sorted.pop() has_data = ~np.isnan(data) if is_single: # Where slots are available, copy the data over is_free = np.isnan(tracklets_single) mask = has_data & is_free tracklets_single[mask] = data[mask] # If about to overwrite data, keep tracklets with highest confidence overwrite = has_data & ~is_free if overwrite.any(): rows, cols = np.nonzero(overwrite) more_confident = (data[overwrite] > tracklets_single[overwrite])[2::3] inds = np.flatnonzero(more_confident) for ind in inds: sl = slice(ind * 3, ind * 3 + 3) inds = rows[sl], cols[sl] tracklets_single[inds] = data[inds] else: is_free = np.isnan(tracklets_multi) overwrite = has_data & ~is_free overwrite_risk = np.any(overwrite, axis=(1, 2)) if overwrite_risk.all(): # Squeeze some data into empty slots mask = has_data & is_free space_left = mask.any(axis=(1, 2)) for ind in np.flatnonzero(space_left): current_mask = mask[ind] tracklets_multi[ind, current_mask] = data[current_mask] has_data[current_mask] = False # For the remaining data, overwrite where we are least confident remaining = data[has_data].reshape((-1, 3)) mask3d = np.broadcast_to( has_data, (self.nindividuals, ) + has_data.shape) temp = tracklets_multi[mask3d].reshape( (self.nindividuals, -1, 3)) diff = remaining - temp # Find keypoints closest to the remaining data dist = np.sqrt(diff[:, :, 0]**2 + diff[:, :, 1]**2) closest = np.argmin(dist, axis=0) # Only overwrite if improving confidence prob = diff[closest, range(len(closest)), 2] better = np.flatnonzero(prob > 0) inds = closest[better] rows, cols = np.nonzero(has_data) for i, j in zip(inds, better): sl = slice(j * 3, j * 3 + 3) tracklets_multi[i, rows[sl], cols[sl]] = remaining.flat[sl] else: tracklets_multi[np.argmin(overwrite_risk), has_data] = data[has_data] multi = tracklets_multi.swapaxes(0, 1).reshape((self.nframes, -1)) data = np.c_[multi, tracklets_single].reshape( (self.nframes, -1, 3)) xy = data[:, :, :2].reshape((self.nframes, -1)) prob = data[:, :, 2].reshape((self.nframes, -1)) # Fill existing gaps and slightly smooth the tracklets missing = np.isnan(xy) xy_filled = columnwise_spline_interp(xy, self.max_gap) filled = ~np.isnan(xy_filled) xy[filled] = xy_filled[filled] inds = np.argwhere(missing & filled) if inds.size: # Retrieve original individual label indices inds[:, 1] //= 2 inds = np.unique(inds, axis=0) prob[inds[:, 0], inds[:, 1]] = 0.01 data[:, :, :2] = xy.reshape((self.nframes, -1, 2)) data[:, :, 2] = prob self.data = data.swapaxes(0, 1) self.xy = self.data[:, :, :2] self.prob = self.data[:, :, 2] # Map a tracklet # to the animal ID it belongs to or the bodypart # it corresponds to. self.individuals = self.cfg["individuals"] + (["single"] if len( self.cfg["uniquebodyparts"]) else []) self.tracklet2id = [ i for i in range(0, self.nindividuals) for _ in bodyparts_multi ] + [self.nindividuals] * len(bodyparts_single) bps = bodyparts_multi + bodyparts_single map_ = dict(zip(bps, range(len(bps)))) self.tracklet2bp = [map_[bp] for bp in self.bodyparts[::3]] self._label_pairs = self.get_label_pairs() else: tracklets_raw = np.full( (len(tracklets_sorted), self.nframes, len(bodyparts)), np.nan) for n, data in enumerate(tracklets_sorted[::-1]): xy = data[1][0] tracklets_raw[n, :, :xy.shape[1]] = xy self.data = (tracklets_raw.swapaxes(0, 1).reshape( (self.nframes, -1, 3)).swapaxes(0, 1)) self.xy = self.data[:, :, :2] self.prob = self.data[:, :, 2] self.tracklet2id = self.tracklet2bp = [0] * self.data.shape[0]