def task1_1(save_path=None): reader = AICityChallengeAnnotationReader( path='data/ai_challenge_s03_c010-full_annotation.xml') gt = reader.get_annotations(classes=['car']) # add probability to delete bounding boxes drop_values = np.linspace(0, 1, 11) maps = [] for drop in drop_values: noise_params = {'drop': drop, 'mean': 0, 'std': 0} gt_noisy = reader.get_annotations(classes=['car'], noise_params=noise_params) y_true = [] y_pred = [] for frame in gt.keys(): y_true.append(gt.get(frame)) y_pred.append(gt_noisy.get(frame, [])) map, _, _ = mean_average_precision(y_true, y_pred) maps.append(map) plt.plot(drop_values, maps) plt.xticks(drop_values) plt.xlabel('drop prob') plt.ylabel('mAP') plt.show() if save_path is not None: plt.savefig(os.path.join(save_path, 'map_drop_bbox.png')) # add noise to the size and position of bounding boxes std_values = np.linspace(0, 100, 11) maps = [] for std in std_values: noise_params = {'drop': 0, 'mean': 0, 'std': std} gt_noisy = reader.get_annotations(classes=['car'], noise_params=noise_params) y_true = [] y_pred = [] for frame in gt.keys(): y_true.append(gt.get(frame)) y_pred.append(gt_noisy.get(frame, [])) map, _, _ = mean_average_precision(y_true, y_pred) maps.append(map) plt.xlabel('std') plt.ylabel('mAP') plt.xticks(std_values) plt.plot(std_values, maps) plt.show() if save_path is not None: plt.savefig(os.path.join(save_path, 'map_noisy_bbox.png'))
def task2(start=0, length=100, save_path=None): reader = AICityChallengeAnnotationReader( path='data/ai_challenge_s03_c010-full_annotation.xml') gt = reader.get_annotations(classes=['car']) gt = {frame: gt[frame] for frame in range(start, start + length)} noise_params = {'drop': 0.05, 'mean': 0, 'std': 10} gt_noisy = reader.get_annotations(classes=['car'], noise_params=noise_params) gt_noisy = { frame: gt_noisy[frame] for frame in range(start, start + length) } video_iou_plot(gt, gt_noisy, video_path='data/AICity_data/train/S03/c010/vdo.avi', title='noisy annotations', save_path=save_path) for detector in ['mask_rcnn', 'ssd512', 'yolo3']: reader = AICityChallengeAnnotationReader( path=f'data/AICity_data/train/S03/c010/det/det_{detector}.txt') det = reader.get_annotations(classes=['car']) det = {frame: det[frame] for frame in range(start, start + length)} video_iou_plot(gt, det, video_path='data/AICity_data/train/S03/c010/vdo.avi', title=f'{detector} detections', save_path=save_path)
def task2_2(debug=False, det_path='data/AICity_data/train/S03/c010/det/det_mask_rcnn.txt'): """ Object tracking: tracking with a Kalman filter """ reader = AICityChallengeAnnotationReader( path='data/ai_challenge_s03_c010-full_annotation.xml') gt = reader.get_annotations(classes=['car']) reader = AICityChallengeAnnotationReader(path=det_path) dets = reader.get_annotations(classes=['car']) cap = cv2.VideoCapture('data/AICity_data/train/S03/c010/vdo.avi') tracker = Sort() tracks = defaultdict(list) y_true = [] y_pred = [] acc = MOTAcumulator() for frame in dets.keys(): detections = dets.get(frame, []) new_detections = tracker.update( np.array([[*d.bbox, d.score] for d in detections])) new_detections = [ Detection(frame, int(d[-1]), 'car', *d[:4]) for d in new_detections ] y_true.append(gt.get(frame, [])) y_pred.append(new_detections) acc.update(y_true[-1], y_pred[-1]) if debug: cap.set(cv2.CAP_PROP_POS_FRAMES, frame) ret, img = cap.read() for d in new_detections: tracks[d.id].append(d.bbox) np.random.seed(d.id) color = tuple(np.random.randint(0, 256, 3).tolist()) for dd in tracks[d.id]: cv2.circle(img, (int( (dd[0] + dd[2]) / 2), int((dd[1] + dd[3]) / 2)), 5, color, -1) cv2.imshow('image', cv2.resize(img, (900, 600))) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows() ap, prec, rec = mean_average_precision(y_true, y_pred, classes=['car']) idf1, idp, idr = acc.get_idf1() print( f"AP: {ap:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}, IDF1: {idf1:.4f}, IDP: {idp:.4f}, IDR: {idr:.4f}" )
def task1_2(): reader = AICityChallengeAnnotationReader( path='data/ai_challenge_s03_c010-full_annotation.xml') gt = reader.get_annotations(classes=['car']) for detector in ['mask_rcnn', 'ssd512', 'yolo3']: reader = AICityChallengeAnnotationReader( path=f'data/AICity_data/train/S03/c010/det/det_{detector}.txt') det = reader.get_annotations(classes=['car']) y_true = [] y_pred = [] for frame in gt.keys(): y_true.append(gt.get(frame)) y_pred.append(det.get(frame, [])) map, _, _ = mean_average_precision(y_true, y_pred) print(f'{detector} mAP: {map:.4f}')
def task4(adaptive, random_search, color_space, channels, model_frac=0.25, save_path=None, min_width=120, max_width=800, min_height=100, max_height=600, debug=0): """ Color modelling """ n_ch = len(channels) # Read information reader = AICityChallengeAnnotationReader( path='data/AICity_data/train/S03/c010/gt/gt.txt') gt = reader.get_annotations(classes=['car'], only_not_parked=True) roi = cv2.imread('data/AICity_data/train/S03/c010/roi.jpg', cv2.IMREAD_GRAYSCALE) # Model Background bg_model = SingleGaussianBackgroundModel( video_path='data/AICity_data/train/S03/c010/vdo.avi', color_space=color_space, channels=channels, resize=None) video_length = bg_model.length bg_model.fit(start=0, length=int(video_length * 0.25)) # Video length start_frame = int(video_length * model_frac) end_frame = int(video_length) # hyperparameter search if random_search: alphas = np.random.choice(np.linspace(2, 4, 50), 25) rhos = np.random.choice(np.linspace(0.001, 0.1, 50), 25) if adaptive else [0] combinations = [(alpha, rho) for alpha, rho in zip(alphas, rhos)] else: alphas = [3.5] rhos = [0.005] if adaptive else [0] combinations = [(alpha, rho) for alpha in alphas for rho in rhos] for alpha, rho in combinations: y_true = [] y_pred = [] if save_path: gif_name = f'100_task3_alpha_{str(alpha)}_rho_{str(rho)}_color_{color_space}_channels_{str(n_ch)}_{time.time()}.gif' writer = imageio.get_writer(os.path.join(save_path, gif_name), fps=10) for frame in trange( start_frame, end_frame, desc= f'obtaining foreground and detecting objects. Alpha {alpha} Rho {rho}' ): if frame == 635: break frame_img, mask, _ = bg_model.evaluate(frame=frame, alpha=alpha) mask = mask & roi non_post_mask = mask mask = postprocess(mask) detections = bounding_boxes(mask, min_height, max_height, min_width, max_width, frame) annotations = gt.get(frame, []) if save_path: img = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR) for det in detections: cv2.rectangle(img, (det.xtl, det.ytl), (det.xbr, det.ybr), (0, 255, 0), 3) for det in annotations: cv2.rectangle(img, (int(det.xtl), int(det.ytl)), (int(det.xbr), int(det.ybr)), (0, 0, 255), 2) writer.append_data(img) if debug >= 1: shape = (480, 270) cv2.imshow(f'BGR Image', cv2.resize(img, shape)) cv2.imshow(f'Segmentation using {color_space}', cv2.resize(non_post_mask, shape)) cv2.imshow(f'Segmentation Morphed using {color_space}', cv2.resize(mask, shape)) if cv2.waitKey(1) & 0xFF == ord('q'): break y_pred.append(detections) y_true.append(annotations) cv2.destroyAllWindows() if save_path: writer.close() ap, prec, rec = mean_average_precision(y_true, y_pred, classes=['car']) print(f'alpha: {alpha:.1f}, rho: {rho:.3f}, AP: {ap:.4f}') print(f'prec: {prec:.4f}, Recall {rec:.4f}')
def task1_2(adaptive, random_search, model_frac=0.25, min_width=120, max_width=800, min_height=100, max_height=600, debug=0, save_path=None): reader = AICityChallengeAnnotationReader( path='data/AICity_data/train/S03/c010/gt/gt.txt') gt = reader.get_annotations(classes=['car'], only_not_parked=True) roi = cv2.imread('data/AICity_data/train/S03/c010/roi.jpg', cv2.IMREAD_GRAYSCALE) bg_model = SingleGaussianBackgroundModel( video_path='data/AICity_data/train/S03/c010/vdo.avi') video_length = bg_model.length bg_model.fit(start=0, length=int(video_length * model_frac)) start_frame = int(video_length * model_frac) end_frame = video_length # hyperparameter search if random_search: alphas = np.random.choice(np.linspace(2, 4, 50), 25) rhos = np.random.choice(np.linspace(0.001, 0.1, 50), 25) if adaptive else [0] combinations = [(alpha, rho) for alpha, rho in zip(alphas, rhos)] else: alphas = [2, 2.5, 3, 3.5, 4] rhos = [0.005, 0.01, 0.025, 0.05, 0.1] if adaptive else [0] combinations = [(alpha, rho) for alpha in alphas for rho in rhos] for alpha, rho in combinations: if save_path: writer = imageio.get_writer(os.path.join( save_path, f'task1_2_alpha{alpha:.1f}_rho{rho:.3f}.gif'), fps=10) y_true = [] y_pred = [] for frame in trange(start_frame, end_frame, desc='evaluating frames'): _, mask, _ = bg_model.evaluate(frame=frame, alpha=alpha, rho=rho) mask = mask & roi mask = postprocess(mask) detections = bounding_boxes(mask, min_height, max_height, min_width, max_width, frame) annotations = gt.get(frame, []) if debug >= 1 or save_path: img = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR) for det in detections: cv2.rectangle(img, (det.xtl, det.ytl), (det.xbr, det.ybr), (0, 255, 0), 2) for det in annotations: cv2.rectangle(img, (int(det.xtl), int(det.ytl)), (int(det.xbr), int(det.ybr)), (0, 0, 255), 2) if save_path: writer.append_data(img) if debug >= 1: cv2.imshow('result', img) if cv2.waitKey(1) & 0xFF == ord('q'): break y_pred.append(detections) y_true.append(annotations) cv2.destroyAllWindows() if save_path: writer.close() ap, prec, rec = mean_average_precision(y_true, y_pred, classes=['car']) print(f'alpha: {alpha:.1f}, rho: {rho:.3f}, AP: {ap:.4f}')
def task3(methods, model_frac=0.25, min_width=120, max_width=800, min_height=100, max_height=600, save_path=None, debug=0): """ Comparison with the state of the art """ reader = AICityChallengeAnnotationReader( path='data/AICity_data/train/S03/c010/gt/gt.txt') gt = reader.get_annotations(classes=['car'], only_not_parked=True) roi = cv2.imread('data/AICity_data/train/S03/c010/roi.jpg', cv2.IMREAD_GRAYSCALE) cap = cv2.VideoCapture('data/AICity_data/train/S03/c010/vdo.avi') video_length = cap.get(cv2.CAP_PROP_FRAME_COUNT) start_frame = int(video_length * model_frac) end_frame = int(video_length) for method in methods: backSub = sota_bg_subtractor(method) for _ in trange(start_frame, desc='modelling background'): ret, img = cap.read() backSub.apply(img) if save_path: writer = imageio.get_writer(os.path.join( save_path, f'task3_method_' + method + '.gif'), fps=10) y_pred = [] y_true = [] for frame in trange(start_frame, end_frame, desc='evaluating frames'): cap.set(cv2.CAP_PROP_POS_FRAMES, frame) ret, img = cap.read() mask = backSub.apply(img) mask = mask & roi mask = postprocess(mask) detections = bounding_boxes(mask, min_height, max_height, min_width, max_width, frame) annotations = gt.get(frame, []) if debug >= 1 or save_path: img = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR) for det in detections: cv2.rectangle(img, (det.xtl, det.ytl), (det.xbr, det.ybr), (0, 255, 0), 2) for det in annotations: cv2.rectangle(img, (int(det.xtl), int(det.ytl)), (int(det.xbr), int(det.ybr)), (0, 0, 255), 2) if save_path: writer.append_data(img) elif debug == 1: cv2.imshow('result', img) if cv2.waitKey(1) & 0xFF == ord('q'): break y_pred.append(detections) y_true.append(annotations) cv2.destroyAllWindows() if save_path: writer.close() ap, prec, rec = mean_average_precision(y_true, y_pred, classes=['car']) print( f'Method: {method}, AP: {ap:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}' )
def task1_1(architecture, start=0, length=None, save_path='results/week3', gpu=0, visualize=False, save_detection='detection_results/'): """ Object detection: off-the-shelf """ tensor = transforms.ToTensor() if architecture.lower() == 'fasterrcnn': model = detection.fasterrcnn_resnet50_fpn(pretrained=True) elif architecture.lower() == 'maskrcnn': model = detection.maskrcnn_resnet50_fpn(pretrained=True) else: raise ValueError(architecture) save_path = os.path.join(save_path, architecture) # Read Video and prepare ground truth cap = cv2.VideoCapture('data/AICity_data/train/S03/c010/vdo.avi') if not length: length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) reader = AICityChallengeAnnotationReader( path='data/ai_challenge_s03_c010-full_annotation.xml') gt = reader.get_annotations(classes=['car']) gt = {frame: gt[frame] for frame in range(start, start + length)} # Start Inference device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu) model.to(device) model.eval() detections = {} y_true, y_pred = [], [] if save_detection: path = os.path.join(save_detection, architecture) if not os.path.exists(path): os.makedirs(path) detection_file = open(f'{path}/{architecture.lower()}.txt', 'w') with torch.no_grad(): for frame in range(start, length): cap.set(cv2.CAP_PROP_POS_FRAMES, frame) ret, img = cap.read() # Transform input to tensor print(f'Predict: {frame}') start_t = time.time() x = [tensor(img).to(device)] preds = model(x)[0] print( f'Inference time per frame: {round(time.time() - start_t, 2)}') # filter car predictions and confidences joint_preds = list( zip(preds['labels'], preds['boxes'], preds['scores'])) car_det = list(filter(lambda x: x[0] == 3, joint_preds)) # car_det = list(filter(lambda x: x[2] > 0.70, car_det)) car_det = get_nms(car_det, 0.7) # add detections detections[frame] = [] for det in car_det: det_obj = Detection(frame=frame, id=None, label='car', xtl=float(det[1][0]), ytl=float(det[1][1]), xbr=float(det[1][2]), ybr=float(det[1][3]), score=det[2]) detections[frame].append(det_obj) if save_detection: detection_file.write( f"{frame},-1,{det_obj.xtl},{det_obj.ytl},{det_obj.width},{det_obj.height},{det_obj.score},-1,-1,-1\n" ) y_pred.append(detections[frame]) y_true.append(gt.get(frame, [])) ap, prec, rec = mean_average_precision(y_true, y_pred, classes=['car']) print( f'Network: {architecture}, AP: {ap:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}' ) if visualize: print(f'Saving result to {save_path}') if not os.path.exists(save_path): os.makedirs(save_path) video_iou_plot(gt, detections, video_path='data/AICity_data/train/S03/c010/vdo.avi', title=f'{architecture} detections', save_path=save_path) cv2.destroyAllWindows() if save_detection: detection_file.close()
def task2_1(debug=False, save_path=None, det_path='data/AICity_data/train/S03/c010/det/det_mask_rcnn.txt'): """ Object tracking: tracking by overlap """ cap = cv2.VideoCapture('data/AICity_data/train/S03/c010/vdo.avi') reader = AICityChallengeAnnotationReader( path='data/ai_challenge_s03_c010-full_annotation.xml') gt = reader.get_annotations(classes=['car']) reader = AICityChallengeAnnotationReader(path=det_path) dets = reader.get_annotations(classes=['car']) if save_path: writer = imageio.get_writer(os.path.join(save_path, f'task21.gif'), fps=10) accumulator = MOTAcumulator() y_true = [] y_pred = [] y_pred_refined = [] tracks = [] max_track = 0 for frame in dets.keys(): if debug or save_path: cap.set(cv2.CAP_PROP_POS_FRAMES, frame) ret, img = cap.read() detections_on_frame = dets.get(frame, []) tracks, frame_tracks, max_track = update_tracks_by_overlap( tracks, detections_on_frame, max_track) frame_detections = [] for track in frame_tracks: det = track.last_detection() frame_detections.append(det) if debug or save_path: cv2.rectangle(img, (int(det.xtl), int(det.ytl)), (int(det.xbr), int(det.ybr)), track.color, 2) cv2.rectangle(img, (int(det.xtl), int(det.ytl)), (int(det.xbr), int(det.ytl) - 15), track.color, -2) cv2.putText(img, str(det.id), (int(det.xtl), int(det.ytl)), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2) for dd in track.detections: cv2.circle(img, dd.center, 5, track.color, -1) y_pred_refined.append(frame_detections) y_pred.append(detections_on_frame) y_true.append(gt.get(frame, [])) accumulator.update(y_true[-1], y_pred_refined[-1]) if save_path: writer.append_data(cv2.resize(img, (600, 350))) elif debug: cv2.imshow('result', cv2.resize(img, (900, 600))) if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows() if save_path: writer.close() ap, prec, rec = mean_average_precision(y_true, y_pred, classes=['car'], sort_method='score') print(f'Original AP: {ap:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}') ap, prec, rec = mean_average_precision(y_true, y_pred_refined, classes=['car'], sort_method='score') print( f'After refinement AP: {ap:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}' ) print('\nAdditional metrics:') print(accumulator.get_idf1())
def launch_test_kalman_filter(save_path, distance_thresholds, min_track_len, min_width, min_height, sequence, camera, detector): save_video = False save_summary = False fps = 24 os.makedirs(save_path, exist_ok=True) reader = AICityChallengeAnnotationReader(path='data/AICity_data/train/' + sequence + '/' + camera + '/gt/gt.txt') gt = reader.get_annotations(classes=['car']) reader = AICityChallengeAnnotationReader(path='data/AICity_data/train/' + sequence + '/' + camera + '/det/det_' + detector + '.txt') dets = reader.get_annotations(classes=['car']) cap = cv2.VideoCapture('data/AICity_data/train/' + sequence + '/' + camera + '/vdo.avi') n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) if save_video: writer = imageio.get_writer(os.path.join( save_path, 'task1_' + sequence + '_' + camera + '_' + detector + '.gif'), fps=fps) tracker = Sort() y_true = [] tracks = [] max_track = 0 video_percentage = 1 start = 0 end = int(n_frames * video_percentage) for frame in trange(start, end, desc='Tracking'): detections_on_frame_ = dets.get(frame, []) detections_on_frame = [] for d in detections_on_frame_: if min_width < (d.ybr - d.ytl) and min_height < (d.xbr - d.xtl): detections_on_frame.append(d) detections_on_frame = tracker.update( np.array([[*d.bbox, d.score] for d in detections_on_frame])) detections_on_frame = [ Detection(frame, int(d[-1]), 'car', *d[:4]) for d in detections_on_frame ] tracks, frame_tracks, max_track = update_tracks_by_overlap( tracks, detections_on_frame, max_track, refinement=False, optical_flow=None) y_true.append(gt.get(frame, [])) idf1s = [] for distance_threshold in distance_thresholds: accumulator = MOTAcumulator() y_pred = [] moving_tracks = remove_static_tracks(tracks, distance_threshold, min_track_len) detections = [] for track in moving_tracks: detections.extend(track.detections) detections = group_by_frame(detections) for frame in trange(start, end, desc='Accumulating detections'): if save_video: cap.set(cv2.CAP_PROP_POS_FRAMES, frame) ret, img = cap.read() for det in y_true[frame]: cv2.rectangle(img, (int(det.xtl), int(det.ytl)), (int(det.xbr), int(det.ybr)), (0, 255, 0), 6) frame_detections = [] for det in detections.get(frame, []): frame_detections.append(det) if save_video: cv2.rectangle(img, (int(det.xtl), int(det.ytl)), (int(det.xbr), int(det.ybr)), track.color, 6) cv2.rectangle(img, (int(det.xtl), int(det.ytl)), (int(det.xbr), int(det.ytl) - 15), track.color, -6) cv2.putText(img, str(det.id), (int(det.xtl), int(det.ytl)), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 6) cv2.circle(img, track.detections[-1].center, 5, track.color, -1) y_pred.append(frame_detections) if save_video: writer.append_data(cv2.resize(img, (600, 350))) accumulator.update(y_true[frame], y_pred[-1]) ap, prec, rec = mean_average_precision(y_true, y_pred, classes=['car'], sort_method=None) print(f'AP: {ap:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}') print('Additional metrics:') summary = accumulator.get_idf1() # As mentioned in https://github.com/cheind/py-motmetrics: # FAR = FalsePos / Frames * 100 # MOTP = (1 - MOTP) * 100 print(summary) if save_summary: with open( os.path.join( save_path, 'task1_' + sequence + '_' + camera + '_' + detector + '_' + str(distance_threshold) + '.txt'), 'w') as f: f.write(str(summary)) idf1s.append(summary['idf1']['acc'] * 100) cv2.destroyAllWindows() if save_video: writer.close() return idf1s
def launch_test_optical_flow(save_path, distance_thresholds, min_track_len, min_width, min_height, sequence, camera, detector): save_video = False save_summary = False fps = 24 os.makedirs(save_path, exist_ok=True) reader = AICityChallengeAnnotationReader(path='data/AICity_data/train/' + sequence + '/' + camera + '/gt/gt.txt') gt = reader.get_annotations(classes=['car']) reader = AICityChallengeAnnotationReader(path='data/AICity_data/train/' + sequence + '/' + camera + '/det/det_' + detector + '.txt') dets = reader.get_annotations(classes=['car']) cap = cv2.VideoCapture('data/AICity_data/train/' + sequence + '/' + camera + '/vdo.avi') n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) if save_video: writer = imageio.get_writer(os.path.join( save_path, 'task1_' + sequence + '_' + camera + '_' + detector + '.gif'), fps=fps) y_true = [] tracks = [] max_track = 0 previous_frame = None video_percentage = 1 start = 0 end = int(n_frames * video_percentage) for frame in trange(start, end, desc='Tracking'): cap.set(cv2.CAP_PROP_POS_FRAMES, frame) ret, img = cap.read() detections_on_frame_ = dets.get(frame, []) detections_on_frame = [] for d in detections_on_frame_: if min_width < (d.ybr - d.ytl) and min_height < (d.xbr - d.xtl): detections_on_frame.append(d) if frame == 0 or not detections_on_frame: optical_flow = None else: height, width = previous_frame.shape[:2] # get points on which to detect the flow points = [] for det in detections_on_frame: points.append([det.xtl, det.ytl]) points.append([det.xbr, det.ybr]) p0 = np.array(points, dtype=np.float32) # params for lucas-kanade optical flow lk_params = dict(winSize=(15, 15), maxLevel=2, criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03)) p1, st, err = cv2.calcOpticalFlowPyrLK(previous_frame, img, p0, None, **lk_params) p0 = p0.reshape((len(detections_on_frame) * 2, 2)) p1 = p1.reshape((len(detections_on_frame) * 2, 2)) st = st.reshape(len(detections_on_frame) * 2) # flow field computed by subtracting prev points from next points flow = p1 - p0 flow[st == 0] = 0 optical_flow = np.zeros((height, width, 2), dtype=np.float32) for jj, det in enumerate(detections_on_frame): optical_flow[int(det.ytl), int(det.xtl)] = flow[2 * jj] optical_flow[int(det.ybr), int(det.xbr)] = flow[2 * jj + 1] previous_frame = img.copy() tracks, frame_tracks, max_track = update_tracks_by_overlap( tracks, detections_on_frame, max_track, refinement=False, optical_flow=optical_flow) y_true.append(gt.get(frame, [])) idf1s = [] for distance_threshold in distance_thresholds: accumulator = MOTAcumulator() y_pred = [] moving_tracks = remove_static_tracks(tracks, distance_threshold, min_track_len) detections = [] for track in moving_tracks: detections.extend(track.detections) detections = group_by_frame(detections) for frame in trange(start, end, desc='Accumulating detections'): if save_video: cap.set(cv2.CAP_PROP_POS_FRAMES, frame) ret, img = cap.read() for det in y_true[frame]: cv2.rectangle(img, (int(det.xtl), int(det.ytl)), (int(det.xbr), int(det.ybr)), (0, 255, 0), 6) frame_detections = [] for det in detections.get(frame, []): frame_detections.append(det) if save_video: cv2.rectangle(img, (int(det.xtl), int(det.ytl)), (int(det.xbr), int(det.ybr)), track.color, 6) cv2.rectangle(img, (int(det.xtl), int(det.ytl)), (int(det.xbr), int(det.ytl) - 15), track.color, -6) cv2.putText(img, str(det.id), (int(det.xtl), int(det.ytl)), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 6) cv2.circle(img, track.detections[-1].center, 5, track.color, -1) y_pred.append(frame_detections) if save_video: writer.append_data(cv2.resize(img, (600, 350))) accumulator.update(y_true[frame], y_pred[-1]) ap, prec, rec = mean_average_precision(y_true, y_pred, classes=['car'], sort_method='score') print(f'AP: {ap:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}') print('Additional metrics:') summary = accumulator.get_idf1() # As mentioned in https://github.com/cheind/py-motmetrics: # FAR = FalsePos / Frames * 100 # MOTP = (1 - MOTP) * 100 print(summary) if save_summary: with open( os.path.join( save_path, 'task1_' + sequence + '_' + camera + '_' + detector + '_' + str(distance_threshold) + '.txt'), 'w') as f: f.write(str(summary)) idf1s.append(summary['idf1']['acc'] * 100) cv2.destroyAllWindows() if save_video: writer.close() return idf1s
def task3_1(video_percentage=1): # Tracking with optical flow cap = cv2.VideoCapture('data/AICity_data/train/S03/c010/vdo.avi') n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(cap.get(cv2.CAP_PROP_FPS)) save_path = 'results/week4/task_31' os.makedirs(save_path, exist_ok=True) reader = AICityChallengeAnnotationReader( path='data/ai_challenge_s03_c010-full_annotation.xml') gt = reader.get_annotations(classes=['car']) reader = AICityChallengeAnnotationReader( path='data/AICity_data/train/S03/c010/det/det_mask_rcnn.txt') dets = reader.get_annotations(classes=['car']) if save_path: writer = imageio.get_writer(os.path.join(save_path, f'task31.gif'), fps=fps) accumulator = MOTAcumulator() y_true = [] y_pred = [] y_pred_refined = [] tracks = [] max_track = 0 previous_frame = None end = int(n_frames * video_percentage) for i, frame in tqdm(enumerate(dets.keys())): if i == end: break if save_path: cap.set(cv2.CAP_PROP_POS_FRAMES, frame) ret, img = cap.read() if i == 0: optical_flow = None else: height, width = previous_frame.shape[:2] # get points on which to detect the flow points = [] for det in detections_on_frame: points.append([det.xtl, det.ytl]) points.append([det.xbr, det.ybr]) p0 = np.array(points, dtype=np.float32) # params for lucas-kanade optical flow lk_params = dict(winSize=(15, 15), maxLevel=2, criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03)) p1, st, err = cv2.calcOpticalFlowPyrLK(previous_frame, img, p0, None, **lk_params) p0 = p0.reshape((len(detections_on_frame) * 2, 2)) p1 = p1.reshape((len(detections_on_frame) * 2, 2)) st = st.reshape(len(detections_on_frame) * 2) # flow field computed by subtracting prev points from next points flow = p1 - p0 flow[st == 0] = 0 optical_flow = np.zeros((height, width, 2), dtype=np.float32) for jj, det in enumerate(detections_on_frame): optical_flow[int(det.ytl), int(det.xtl)] = flow[2 * jj] optical_flow[int(det.ybr), int(det.xbr)] = flow[2 * jj + 1] previous_frame = img.copy() detections_on_frame = dets.get(frame, []) tracks, frame_tracks, max_track = update_tracks_by_overlap( tracks, detections_on_frame, max_track, refinement=False, optical_flow=optical_flow) frame_detections = [] for track in frame_tracks: det = track.last_detection() frame_detections.append(det) if save_path: cv2.rectangle(img, (int(det.xtl), int(det.ytl)), (int(det.xbr), int(det.ybr)), track.color, 2) cv2.rectangle(img, (int(det.xtl), int(det.ytl)), (int(det.xbr), int(det.ytl) - 15), track.color, -2) cv2.putText(img, str(det.id), (int(det.xtl), int(det.ytl)), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2) for dd in track.detections: cv2.circle(img, dd.center, 5, track.color, -1) y_pred_refined.append(frame_detections) y_pred.append(detections_on_frame) y_true.append(gt.get(frame, [])) accumulator.update(y_true[-1], y_pred_refined[-1]) if save_path: writer.append_data(cv2.resize(img, (600, 350))) cv2.destroyAllWindows() if save_path: writer.close() ap, prec, rec = mean_average_precision(y_true, y_pred, classes=['car'], sort_method='score') print(f'Original AP: {ap:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}') ap, prec, rec = mean_average_precision(y_true, y_pred_refined, classes=['car'], sort_method='score') print( f'After refinement AP: {ap:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}' ) print('\nAdditional metrics:') print(accumulator.get_idf1())