def plot_speed(root, id): detections = group_by_id( parse_annotations_from_txt(os.path.join(root, 'gt', 'gt.txt'))) H = read_calibration(os.path.join(root, 'calibration.txt')) cap = cv2.VideoCapture(os.path.join(root, 'vdo.avi')) fps = cap.get(cv2.CAP_PROP_FPS) track_3d = [] for det in sorted(detections[id], key=lambda det: det.frame): u, v = (det.xtl + det.xbr) / 2, det.ybr # bottom center lat, lon = image2world( u, v, H) # backproject to obtain latitude/longitude in degrees lat, lon = degrees2meters(lat, lon) # convert degrees to meters track_3d.append(np.array([lat, lon])) cap.set(cv2.CAP_PROP_POS_FRAMES, det.frame) ret, img = cap.read() if len(track_3d) >= 5: img = cv2.rectangle(img, (int(det.xtl), int(det.ytl)), (int(det.xbr), int(det.ybr)), (0, 255, 0), 2) speed = magnitude(estimate_speed(np.array(track_3d[-5:]), fps)) img = cv2.putText(img, f'{speed * 3.6:.2f} km/h', (int(det.xtl), int(det.ytl) - 10), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2) cv2.imshow('tracks', cv2.resize(img, (960, 540))) if cv2.waitKey(1) & 0xFF == ord('q'): break track_3d = np.array(track_3d) speed = estimate_speed(track_3d, fps) print( f'id: {id}, avg speed: ({speed[0] * 3.6:.2f}, {speed[1] * 3.6:.2f}) km/h' )
def generate_crops(root, save_path): for cam in os.listdir(root): detections_by_frame = group_by_frame( parse_annotations_from_txt( os.path.join(root, cam, 'gt', 'gt.txt'))) cap = cv2.VideoCapture(os.path.join(root, cam, 'vdo.avi')) length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) for _ in tqdm(range(length), desc=cam): frame = int(cap.get(cv2.CAP_PROP_POS_FRAMES)) _, img = cap.read() if frame not in detections_by_frame: continue for det in detections_by_frame[frame]: if det.width >= width and det.height >= height: id_path = os.path.join(save_path, str(det.id)) os.makedirs(id_path, exist_ok=True) roi = img[int(det.ytl):int(det.ybr), int(det.xtl):int(det.xbr)] resized = cv2.resize(roi, (width, height)) cv2.imwrite( os.path.join(id_path, f'{cam}_{frame}.png'), resized)
def test_encoder(metric='euclidean'): root = '../../../data/AIC20_track3/train/S03' cams = ['c010', 'c011', 'c012', 'c013', 'c014', 'c015'] detections = {} cap = {} for cam in cams: frame_detections = defaultdict(list) for det in parse_annotations_from_txt( os.path.join(root, cam, 'mtsc', 'mtsc_tc_mask_rcnn.txt')): if det.height >= 128 and det.width >= 128: frame_detections[det.frame].append(det) detections[cam] = frame_detections cap[cam] = cv2.VideoCapture(os.path.join(root, cam, 'vdo.avi')) def random_detection(cam=None, id=None): if cam is None: cam = np.random.choice(cams) if id is None: frame = np.random.choice(list(detections[cam].keys())) det = np.random.choice(detections[cam][frame]) else: for frame in np.random.permutation(list(detections[cam].keys())): found = False for det in detections[cam][frame]: if det.id == id: found = True break if found: break else: raise ValueError(f'id {id} not found in cam {cam}') cap[cam].set(cv2.CAP_PROP_POS_FRAMES, det.frame) ret, img = cap[cam].read() img = img[int(det.ytl):int(det.ybr), int(det.xtl):int(det.xbr)] return img, (cam, det.id) encoder = Encoder(path='../metric_learning/checkpoints/epoch_19__ckpt.pth') print(encoder) encoder.eval() pairs = [(('c010', 15), ('c011', 29)), None] for p in pairs: if p is not None: img1, info1 = random_detection(*p[0]) img2, info2 = random_detection(*p[1]) else: img1, info1 = random_detection() img2, info2 = random_detection() embd1 = encoder.get_embedding(img1) embd2 = encoder.get_embedding(img2) dist = paired_distances([embd1], [embd2], metric).squeeze() print(dist) cv2.imshow('{}:{}'.format(*info1), img1) cv2.imshow('{}:{}'.format(*info2), img2) cv2.waitKey(0) cv2.destroyAllWindows()
def reid_exhaustive(root, seq, model_path, metric='euclidean'): seq_path = os.path.join(root, 'train', seq) cams = sorted(os.listdir(seq_path)) # read data tracks_by_cam = { cam: group_by_id( parse_annotations_from_txt( os.path.join(seq_path, cam, 'mtsc', 'mtsc_tc_mask_rcnn.txt'))) for cam in cams } cap = { cam: cv2.VideoCapture(os.path.join(seq_path, cam, 'vdo.avi')) for cam in cams } # filter out static tracks for cam in cams: tracks_by_cam[cam] = dict( filter(lambda x: not is_static(x[1]), tracks_by_cam[cam].items())) # initialize encoder encoder = Encoder(path=model_path) encoder.eval() # compute all embeddings model_name = os.path.splitext(os.path.basename(model_path))[0] embeddings_file = os.path.join('./embeddings', f'{model_name}_{seq}.pkl') if os.path.exists(embeddings_file): with open(embeddings_file, 'rb') as f: embeddings = pickle.load(f) else: embeddings = get_track_embeddings(tracks_by_cam, cap, encoder, save_path=embeddings_file) embeddings = {(cam, id): embd for cam in embeddings for id, embd in embeddings[cam].items()} # cluster embeddings to associate tracks clustering = DBSCAN(eps=0.3, min_samples=2, metric=metric) clustering.fit(np.stack(list(embeddings.values()))) groups = defaultdict(list) for id, label in zip(embeddings.keys(), clustering.labels_): groups[label].append(id) groups = list(groups.values()) results = defaultdict(list) for global_id, group in enumerate(groups): for cam, id in group: track = tracks_by_cam[cam][id] for det in track: det.id = global_id results[cam].append(track) return results
def plot_tracks(root, global_id=False): if global_id: annotations_file = os.path.join(root, 'gt', 'gt.txt') else: annotations_file = os.path.join(root, 'mtsc', 'mtsc_tc_mask_rcnn.txt') detections = group_by_frame(parse_annotations_from_txt(annotations_file)) cap = cv2.VideoCapture(os.path.join(root, 'vdo.avi')) for frame in detections.keys(): cap.set(cv2.CAP_PROP_POS_FRAMES, frame) ret, img = cap.read() img = draw_detections(img, detections[frame]) cv2.imshow('tracks', cv2.resize(img, (960, 540))) if cv2.waitKey(1) & 0xFF == ord('q'): break
def plot_sync(root, seq, cam1, cam2): dets = { cam: group_by_frame( parse_annotations_from_txt( os.path.join(root, 'train', seq, cam, 'mtsc', 'mtsc_tc_mask_rcnn.txt'))) for cam in [cam1, cam2] } cap = { cam: cv2.VideoCapture(os.path.join(root, 'train', seq, cam, 'vdo.avi')) for cam in [cam1, cam2] } fps = {cam: cap[cam].get(cv2.CAP_PROP_FPS) for cam in [cam1, cam2]} timestamp = read_timestamps( os.path.join(root, 'cam_timestamp', f'{seq}.txt')) # compute camera overlap in time start_time = max(timestamp[cam1] + list(dets[cam1].keys())[0] / fps[cam1], timestamp[cam2] + list(dets[cam2].keys())[0] / fps[cam2]) end_time = min(timestamp[cam1] + list(dets[cam1].keys())[-1] / fps[cam1], timestamp[cam2] + list(dets[cam2].keys())[-1] / fps[cam2]) for t in np.arange(start_time, end_time, min(1 / fps[cam1], 1 / fps[cam2])): frame1 = int(round((t - timestamp[cam1]) * fps[cam1])) frame2 = int(round((t - timestamp[cam2]) * fps[cam2])) print(f'{t:.3f}, {frame1}, {frame2}') cap[cam1].set(cv2.CAP_PROP_POS_FRAMES, frame1) _, img1 = cap[cam1].read() img1 = draw_detections(img1, dets[cam1].get(frame1, [])) cv2.imshow(cam1, cv2.resize(img1, (960, 540))) cap[cam2].set(cv2.CAP_PROP_POS_FRAMES, frame2) _, img2 = cap[cam2].read() img2 = draw_detections(img2, dets[cam2].get(frame2, [])) cv2.imshow(cam2, cv2.resize(img2, (960, 540))) if cv2.waitKey(1) & 0xFF == ord('q'): break
def plot_timeline(root, seq, id): timestamps = read_timestamps( os.path.join(root, 'cam_timestamp', f'{seq}.txt')) ranges = {} for cam in timestamps.keys(): detections = group_by_id( parse_annotations_from_txt( os.path.join(root, 'train', seq, cam, 'gt', 'gt.txt'))) if id in detections: cap = cv2.VideoCapture( os.path.join(root, 'train', seq, cam, 'vdo.avi')) fps = cap.get(cv2.CAP_PROP_FPS) id_detections = sorted(detections[id], key=lambda det: det.frame) start_time = timestamps[cam] + id_detections[0].frame / fps end_time = timestamps[cam] + id_detections[-1].frame / fps ranges[cam] = (start_time, end_time) plt.barh(range(len(ranges)), [end - start for start, end in ranges.values()], left=[start for start, _ in ranges.values()]) plt.yticks(range(len(ranges)), list(ranges.keys())) plt.title(f'id = {id}') plt.show()
def reid_graph(root, seq, model_path, metric='euclidean', thresh=20): seq_path = os.path.join(root, 'train', seq) cams = sorted(os.listdir(seq_path)) # read data tracks_by_cam = { cam: group_by_id( parse_annotations_from_txt( os.path.join(seq_path, cam, 'mtsc', 'mtsc_tc_mask_rcnn.txt'))) for cam in cams } cap = { cam: cv2.VideoCapture(os.path.join(seq_path, cam, 'vdo.avi')) for cam in cams } fps = {cam: cap[cam].get(cv2.CAP_PROP_FPS) for cam in cams} H = { cam: read_calibration(os.path.join(seq_path, cam, 'calibration.txt')) for cam in cams } timestamp = read_timestamps( os.path.join(root, 'cam_timestamp', f'{seq}.txt')) # filter out static tracks for cam in cams: tracks_by_cam[cam] = dict( filter(lambda x: not is_static(x[1]), tracks_by_cam[cam].items())) # initialize encoder encoder = Encoder(path=model_path) encoder.eval() # compute all embeddings model_name = os.path.splitext(os.path.basename(model_path))[0] embeddings_file = os.path.join('./embeddings', f'{model_name}_{seq}.pkl') if os.path.exists(embeddings_file): with open(embeddings_file, 'rb') as f: embeddings = pickle.load(f) else: embeddings = get_track_embeddings(tracks_by_cam, cap, encoder, save_path=embeddings_file) G = nx.Graph() for cam1 in cams: for id1, track1 in tracks_by_cam[cam1].items(): track1.sort(key=lambda det: det.frame) dir1 = bbox2gps(track1[-1].bbox, H[cam1]) - bbox2gps( track1[-min(int(fps[cam1]), len(track1) - 1)].bbox, H[cam1]) range1 = time_range(track1, timestamp[cam1], fps[cam1]) candidates = [] for cam2 in cams: if cam2 == cam1: continue if angle_to_cam(track1, H[cam1], cam2) < 45: # going towards the camera for id2, track2 in tracks_by_cam[cam2].items(): track2.sort(key=lambda det: det.frame) dir2 = bbox2gps( track2[min(int(fps[cam2]), len(track2) - 1)].bbox, H[cam2]) - bbox2gps(track2[0].bbox, H[cam2]) range2 = time_range(track2, timestamp[cam2], fps[cam2]) if range2[0] >= range1[ 0]: # car is detected later in second camera if angle(dir1, dir2 ) < 15: # tracks have similar direction candidates.append((cam2, id2)) if len(candidates) > 0: dist = pairwise_distances( [embeddings[cam1][id1]], [embeddings[cam2][id2] for cam2, id2 in candidates], metric).flatten() ind = dist.argmin() if dist[ind] < thresh: cam2, id2 = candidates[ind] G.add_edge((cam1, id1), (cam2, id2)) groups = [] while G.number_of_nodes() > 0: cliques = nx.find_cliques(G) maximal = max(cliques, key=len) groups.append(maximal) G.remove_nodes_from(maximal) results = defaultdict(list) for global_id, group in enumerate(groups): for cam, id in group: track = tracks_by_cam[cam][id] for det in track: det.id = global_id results[cam].append(track) return results
def reid_spatiotemporal(root, seq, model_path, metric='euclidean', thresh=20): seq_path = os.path.join(root, 'train', seq) cams = sorted(os.listdir(seq_path)) # read data tracks_by_cam = { cam: group_in_tracks( parse_annotations_from_txt( os.path.join(seq_path, cam, 'mtsc', 'mtsc_tc_mask_rcnn.txt')), cam) for cam in cams } cap = { cam: cv2.VideoCapture(os.path.join(seq_path, cam, 'vdo.avi')) for cam in cams } fps = {cam: cap[cam].get(cv2.CAP_PROP_FPS) for cam in cams} H = { cam: read_calibration(os.path.join(seq_path, cam, 'calibration.txt')) for cam in cams } timestamp = read_timestamps( os.path.join(root, 'cam_timestamp', f'{seq}.txt')) # filter out static tracks for cam in cams: tracks_by_cam[cam] = dict( filter(lambda x: not is_static(x[1].detections), tracks_by_cam[cam].items())) # initialize encoder encoder = Encoder(path=model_path) encoder.eval() # compute all embeddings model_name = os.path.splitext(os.path.basename(model_path))[0] embeddings_file = os.path.join('./embeddings', f'{model_name}_{seq}.pkl') if os.path.exists(embeddings_file): with open(embeddings_file, 'rb') as f: embeddings = pickle.load(f) else: embeddings = get_track_embeddings(tracks_by_cam, cap, encoder, save_path=embeddings_file) for cam1 in cams: for id1, track1 in tracks_by_cam[cam1].items(): dets1 = sorted(track1.detections, key=lambda det: det.frame) dir1 = bbox2gps(dets1[-1].bbox, H[cam1]) - bbox2gps( dets1[-min(int(fps[cam1]), len(dets1) - 1)].bbox, H[cam1]) range1 = time_range(dets1, timestamp[cam1], fps[cam1]) candidates = [] for cam2 in cams: if cam2 == cam1: continue if angle_to_cam(dets1, H[cam1], cam2) < 45: # going towards the camera for id2, track2 in tracks_by_cam[cam2].items(): dets2 = sorted(track2.detections, key=lambda det: det.frame) dir2 = bbox2gps( dets2[min(int(fps[cam2]), len(dets2) - 1)].bbox, H[cam2]) - bbox2gps(dets2[0].bbox, H[cam2]) range2 = time_range(dets2, timestamp[cam2], fps[cam2]) if range2[0] >= range1[ 0]: # car is detected later in second camera if angle(dir1, dir2 ) < 15: # tracks have similar direction if not track2.prev_track and not track1.next_track: # track has not been previously matched to another track from the same direction candidates.append((cam2, id2)) if len(candidates) > 0: dist = pairwise_distances( [embeddings[cam1][id1]], [embeddings[cam2][id2] for cam2, id2 in candidates], metric).flatten() ind = dist.argmin() if dist[ind] < thresh: # merge matched tracks cam2, id2 = candidates[ind] tracks_by_cam[cam1][id1].set_next_track((cam2, id2)) tracks_by_cam[cam2][id2].set_prev_track((cam1, id1)) starting_tracks = [] for cam, tracks in tracks_by_cam.items(): for id, track in tracks.items(): if track.next_track and not track.prev_track: starting_tracks.append(track) # propagate ids through tracks connected to starting tracks results = defaultdict(list) for global_id, track in enumerate(starting_tracks): track.id = global_id results[track.camera].append(track) next_track = track.next_track while next_track: cam, id = next_track track = tracks_by_cam[cam][id] track.id = global_id results[cam].append(track) next_track = track.next_track return results
for line in lines: file.write(','.join(list(map(str, line))) + '\n') if __name__ == '__main__': root = '../../../data/AIC20_track3' seq = 'S03' results = reid( root, seq, model_path='../metric_learning/checkpoints/epoch_19__ckpt.pth', method='graph') write_results(results, path=os.path.join('results', seq)) accumulator = MOTAcumulator() for cam in os.listdir(os.path.join(root, 'train', seq)): dets_true = group_by_frame( parse_annotations_from_txt( os.path.join(root, 'train', seq, cam, 'gt', 'gt.txt'))) dets_pred = group_by_frame( parse_annotations_from_txt( os.path.join('results', seq, cam, 'results.txt'))) for frame in dets_true.keys(): y_true = dets_true.get(frame, []) y_pred = dets_pred.get(frame, []) accumulator.update(y_true, y_pred) # print(f'IDF1: {accumulator.get_idf1()}') print(accumulator.get_metrics())
cam_dir = np.array(CAMERA_LOCATION[cam]) - bbox2gps(track[-num_frames].bbox, H) return angle(speed_dir, cam_dir) def time_range(track, timestamp, fps): """Track should be sorted by frame.""" # track.sort(key=lambda det: det.frame) start_time = timestamp + track[0].frame / fps end_time = timestamp + track[-1].frame / fps return start_time, end_time if __name__ == '__main__': import os from utils.aicity_reader import parse_annotations_from_txt, group_by_id cam = 'c012' root = os.path.join('../../../data/AIC20_track3/train/S03', cam) detections = group_by_id(parse_annotations_from_txt(os.path.join(root, 'gt', 'gt.txt'))) H = read_calibration(os.path.join(root, 'calibration.txt')) id = np.random.choice(list(detections.keys())) track = sorted(detections[id], key=lambda det: det.frame) for c in [f'c{c:03d}' for c in range(10, 16)]: if c != cam: a = angle_to_cam(track, H, c) print(f'{c}, {id}, {a:.2f}', 'going' if a < 90 else 'coming')