def plot_frame(self, ax, X, true_detections, id_colors, frame, with_gt, gt_bbs): """ plots the frame with its true detections """ Y = utils.extract_eq(true_detections, col=0, value=frame) X = X[frame] ax.imshow(X) for _, pid, x, y, w, h, score in Y: ax.text(x, y, str(int(pid)), color='white', fontsize=17, bbox={ 'facecolor': 'red', 'alpha': 0.5 }) bbX, bbY = utils.bb_to_plt_plot(x, y, w, h) ax.plot(bbX, bbY, linewidth=2, color=id_colors[pid]) if with_gt: Y = utils.extract_eq(gt_bbs, col=0, value=frame) for _, pid, x, y, w, h in Y: bbX, bbY = utils.bb_to_plt_plot(x, y, w, h) ax.plot(bbX, bbY, 'g--', linewidth=4) # -------------
def get_visible_pedestrains(Y_gt): """ return people without distractors """ #Y_gt = utils.extract_eq(Y_gt, col=0, value=frame) Y_gt = utils.extract_eq(Y_gt, col=7, value=1) Y_gt = utils.extract_eq(Y_gt, col=8, value=1) return Y_gt
def simplify_gt(Y_gt): """ simplifies the GT data to only contain [frame, pid, x, y, w, h] from sensible detections only (Pedestrian + Static person) """ Y_ped = utils.extract_eq(Y_gt, col=7, value=1) # Pedestrian Y_sta = utils.extract_eq(Y_gt, col=7, value=7) # Static person Y = np.vstack([Y_ped, Y_sta]) Y = utils.extract_eq(Y, col=6, value=1) # is visible Y = utils.extract_eq(Y, col=8, value=1) # is visible return Y[:, 0:6]
def __init__(self, root, target_w, target_h, video="MOT16-02"): mot16 = MOT16(root) self.target_w = target_w self.target_h = target_h X, _, Y_gt = mot16.get_train(video, memmapped=True) Y_gt = MOT16Sampler.get_visible_pedestrains(Y_gt) # only humans n_frames, h, w, _ = X.shape self.lookup = {} self.pid_frame_lookup = {} self.X = X self.n_frames = n_frames self.frames_with_persons = [] self.pids_per_frame = {} for f in range(n_frames): Gt_per_frame = utils.extract_eq(Y_gt, col=0, value=f) n = len(Gt_per_frame) pids = Gt_per_frame[:, 1] left = Gt_per_frame[:, 2] top = Gt_per_frame[:, 3] width = Gt_per_frame[:, 4] height = Gt_per_frame[:, 5] valid_persons = 0 valid_pids = [] for pid, x, y, w, h in zip(*[pids, left, top, width, height]): # only take bbs that are 'big' enough if w > 50 and h > 100: pid = int(pid) if pid not in self.lookup: self.lookup[pid] = [] self.lookup[pid].append(f) H, W, _ = X[f].shape x_left = max(0, int(x)) y_top = max(0, int(y)) x_right = min(W - 1, int(x + w)) y_bottom = min(H - 1, int(y + h)) self.pid_frame_lookup[pid, f] = \ (x_left, y_top, x_right, y_bottom) valid_persons += 1 valid_pids.append(pid) self.pids_per_frame[f] = valid_pids if valid_persons > 1: self.frames_with_persons.append(f) del_pids = [] # (pid, frame) remove all pids who are only visible once for pid, visible_in_frames in self.lookup.items(): assert len(visible_in_frames) > 0 if len(visible_in_frames) == 1: del_pids.append((pid, visible_in_frames[0])) for pid, frame in del_pids: self.lookup.pop(pid) self.pid_frame_lookup.pop((pid, frame)) valid_pids = self.pids_per_frame[frame] self.pids_per_frame[frame] = [p for p in valid_pids if p != pid] print('(MOT16) total number of bounding boxes:', len(self.pid_frame_lookup))
def remove_duplicates(X, score_fun): """ this function runs over the data and selects the "best" (according to the score-function) for each frame per class X: {np.array} data with shape (n,m): n is the number of data points, m is the data dimension. The first element is the frame number, the second one the id. The rest is the data term e.g.: [ [frame, pid, ...DATA...], ... ] score_fun: {function} gets a single data point and evaluates a score. The data point with the largest score is kept while all others are dropped returns {np.array} with the same structure as X but with only a single id per frame. """ last_frame = int(np.max(X[:, 0])) first_frame = int(np.min(X[:, 0])) assert last_frame > 0 and first_frame > 0 and last_frame >= first_frame X_result = [] for frame in range(first_frame, last_frame + 1): X_frame = utils.extract_eq(X, 0, frame) if len(X_frame) > 0: lookup = {} for x in X_frame: pid, score = x[1], score_fun(x) if pid in lookup: if score > lookup[pid][0]: lookup[pid] = (score, x) else: lookup[pid] = (score, x) for pid, x in lookup.values(): X_result.append(x) return np.array(X_result)
def get_visible_pedestrains_det(Y_det, frame): Y_det_frame1 = utils.extract_eq(Y_det, col=0, value=frame) return Y_det_frame1
def get_visible_pedestrains(Y_gt, frame): Y_gt_frame1 = utils.extract_eq(Y_gt, col=0, value=frame) #Y_gt_frame1 = utils.extract_eq(Y_gt_frame1, col=7, value=1) #Y_gt_frame1 = utils.extract_eq(Y_gt_frame1, col=8, value=1) return Y_gt_frame1
def evaluate(Gt, Hy, T, calc_cost, debug_info=False): """ Runs the Multiple Object Tracking Metrics algorithm Gt: Ground-truth: [ [frame, pid, ..DATA.. ], ... ] Hy: hypothesis: [ [frame, pid, ..DATA.. ], ... ] T: cost threshold after which pairs cannot be connected anymore calc_cost: {function} that gets the ..DATA.. term as parameter: e.g. for points it could calculate the distance, for aabb's it could calculate IoU.. debug_info: {boolean} if True we get the actual Gt-Hy pairs that created FP, FN and MME return: fp: List<Integer> of false-positives m: List<Integer> of misses mme: List<Integer> of mismatches c: List<Integer> of matches d: List<Double> of distances beween o_i and h_i (summed) g: List<Integer> number of objects in t all lists have the same length of total number of frames """ global HIGH_VALUE first_frame = np.min(Gt[:, 0]) last_frame = np.max(Gt[:, 0]) assert floor(first_frame) == ceil(first_frame) # make sure the assert floor(last_frame) == ceil(last_frame) # values are integers first_frame = int(first_frame) last_frame = int(last_frame) number_of_frames = last_frame - first_frame + 1 fp = [0] * number_of_frames m = [0] * number_of_frames mme = [0] * number_of_frames c = [0] * number_of_frames d = [0] * number_of_frames g = [0] * number_of_frames M = MatchLookup(first_frame, last_frame) if debug_info: FP_pairs = [] FN_pairs = [] MME_pairs = [] Gt_local2global = LocalFrameIdToGlobal(Gt) Hy_local2global = LocalFrameIdToGlobal(Hy) # ---------------------------- # "t" is the true frame number that can be any integer # "t_pos" is the respective integer that starts at 0 ... for t_pos, t in enumerate(range(first_frame, last_frame + 1)): Ot = SingleFrameData(extract_eq(Gt, col=0, value=t)) Ht = SingleFrameData(extract_eq(Hy, col=0, value=t)) g[t_pos] = Ot.total_elements # count number of objects in t # ---------------------------------- # verify if old match is still valid! # ---------------------------------- is_empty = True for (o, h) in M.get_matches(t - 1): oid, hid = o[0], h[0] if Ot.has(oid) and Ht.has(hid): o_cur = Ot.find(oid) h_cur = Ht.find_best(hid, o_cur, calc_cost) cost = calc_cost(o_cur[1:], h_cur[1:]) if cost < T: # the tracked object is still valid! :) Ot.remove(o_cur) Ht.remove(h_cur) M.insert_match(t, o_cur, h_cur) is_empty = False if is_empty: M.init(t) # to allow frames with no matches! # ---------------------------------- # match not-yet corresponding pairs # ---------------------------------- Ot_ummatched = Ot.as_list() # the already matched elements Ht_unmatched = Ht.as_list() # were removed count_o, count_h = len(Ot_ummatched), len(Ht_unmatched) C = np.ones((count_o, count_h)) * HIGH_VALUE for i, o in enumerate(Ot_ummatched): for j, h in enumerate(Ht_unmatched): cost = calc_cost(o[1:], h[1:]) C[i, j] = cost if cost < T else HIGH_VALUE row_ind, col_ind = linear_sum_assignment(C) for i, j in zip(row_ind, col_ind): o_cur, h_cur, cost = Ot_ummatched[i], Ht_unmatched[j], C[i, j] if cost < T: Ot.remove(o_cur) Ht.remove(h_cur) M.insert_match(t, o_cur, h_cur) if M.has_mismatch(t, o_cur, h_cur): mme[t_pos] += 1 if debug_info: # only do this if we want debug infos MME_pairs.append((Gt_local2global.get_true_idx(t, i), Hy_local2global.get_true_idx(t, j))) # ---------------------------------- # handle unmatched rest # ---------------------------------- c[t_pos] = M.count_matches(t) fp[t_pos] = Ht.elements_left assert fp[t_pos] >= 0 if debug_info: # only do this if we want debug infos for _, leftovers in Ht.lookup.items(): for leftover in leftovers: item = [t] item.extend(leftover) FP_pairs.append(item) m[t_pos] = Ot.elements_left assert m[t_pos] >= 0 if debug_info: # only do this if we want debug infos for _, leftovers in Ot.lookup.items(): for leftover in leftovers: item = [t] item.extend(leftover) FN_pairs.append(item) # ---------------------------------- # calculate cost between all matches # ---------------------------------- cost_sum = 0 for (o, h) in M.get_matches(t): cost_sum += calc_cost(o[1:], h[1:]) d[t_pos] = cost_sum # ---------------------------- if debug_info: return fp, m, mme, c, d, g, \ np.array(FN_pairs), np.array(FP_pairs), np.array(MME_pairs) else: return fp, m, mme, c, d, g
def remove_short_tracks(X, min_length): """ very short tracklets are usually a sign of noisy detection and should be removed to get more stable results. ATTENTION: this function assumes that X has no duplicates! Please remove all duplicates prior to using this function X: {np.array} data with shape (n,m): n is the number of data points, m is the data dimension. The first element is the frame number, the second one the id. The rest is the data term e.g.: [ [frame, pid, ...DATA...], ... ] min_length: {Integer} inclusive threshold after which a tracklet is being dropped """ last_frame = int(np.max(X[:, 0])) first_frame = int(np.min(X[:, 0])) assert last_frame > 0 and first_frame > 0 and last_frame >= first_frame get_pid = lambda x: int(x[1]) get_pid_from_tracklet = lambda lst: get_pid(lst[0]) X_result = [] # count the length of the current tracklet per id # structure: # KEY: pid # DATA: (occured {Boolean}, data {list}) pid_tracklets = {} for frame in range(first_frame, last_frame): X_frame = utils.extract_eq(X, 0, frame) # reset the 'occurence' flag on all tracks for pid in pid_tracklets.keys(): pid_tracklets[pid][0] = False if len(X_frame) > 0: lookup = set() for x in X_frame: pid = get_pid(x) assert pid not in lookup, "Frames MUST NOT have duplicate ids" lookup.add(pid) if pid in pid_tracklets: pid_tracklets[pid][0] = True pid_tracklets[pid][1].append(x) else: pid_tracklets[pid] = [True, [x]] # check if a track was lost: if so see if we need to drop it or add it # to the True results drop_pids = [] # keys that will be dropped from current tracking for occured, tracklet in pid_tracklets.values(): if not occured: drop_pids.append(get_pid_from_tracklet(tracklet)) if len(tracklet) > min_length: X_result.extend(tracklet) # the tracklet is long enough for pid in drop_pids: del pid_tracklets[pid] # finish up all left-over tracks for _, tracklet in pid_tracklets.values(): if len(tracklet) > min_length: X_result.extend(tracklet) return np.array(X_result)