def compute_histogram_gt(gt_detections, video_path): capture = cv2.VideoCapture(video_path) n_frame = 0 tracks_gt_with_hist = [] while capture.isOpened(): valid, image = capture.read() if not valid: break detections_on_frame = [x for x in gt_detections if x.frame == n_frame] for detection in detections_on_frame: track_corresponding = [ t for t in tracks_gt_with_hist if t.id == detection.track_id ] if len(track_corresponding) > 0: track_corresponding[0].detections.append( Detection(detection.frame, detection.label, detection.xtl, detection.ytl, detection.width, detection.height, detection.confidence, track_id=detection.track_id, histogram=rgb_histogram( image[detection.ytl:(detection.ytl + detection.height), detection.xtl:(detection.xtl + detection.width)]))) else: track_corresponding = Track(detection.track_id, [ Detection(detection.frame, detection.label, detection.xtl, detection.ytl, detection.width, detection.height, detection.confidence, track_id=detection.track_id, histogram=rgb_histogram( image[detection.ytl:(detection.ytl + detection.height), detection.xtl:(detection.xtl + detection.width)])) ]) tracks_gt_with_hist.append(track_corresponding) n_frame += 1 return tracks_gt_with_hist
def transform_detection(det_0, homography1, homography2): minc, minr, maxc, maxr = det_0.bbox H1 = np.array(homography1) H2 = np.array(homography2) x = minc y = maxr det_1_hom = apply_homography_to_point(x, y, H1, H2) x_br = maxc y_br = maxr det_1_hom_br = apply_homography_to_point(x_br, y_br, H1, H2) original_ratio = (maxr - minr) / (maxc - minc) # height/width of bbox width_transformed = abs(det_1_hom[0] - det_1_hom_br[0]) height_transformed = original_ratio * width_transformed predicted_bbox_1 = [ min(det_1_hom[0], det_1_hom_br[0]), min(det_1_hom[1], det_1_hom_br[1]) - height_transformed, max(det_1_hom[0], det_1_hom_br[0]), max(det_1_hom[1], det_1_hom_br[1]) ] predicted_correspondence = Detection( det_0.frame, det_0.label, predicted_bbox_1[0], predicted_bbox_1[1], predicted_bbox_1[2] - predicted_bbox_1[0], predicted_bbox_1[3] - predicted_bbox_1[1], histogram=det_0.histogram) return predicted_correspondence
def filtering_parked(detections, video_path): print("Removing parked cars") capture = cv2.VideoCapture(video_path) n_frame = 0 final_det = list() while capture.isOpened(): valid, frame = capture.read() if not valid: break fr = frame.copy() detections_on_frame = [x for x in detections if x.frame == n_frame] detections_on_next_frame = [ x for x in detections if x.frame == n_frame + PIXELS_SHIFT ] detections_bboxes = [o.bbox for o in detections_on_frame] detections_bboxes_next = [o.bbox for o in detections_on_next_frame] new_candidates = [] # print(len(detections_bboxes)) for candidate in detections_bboxes: minc, minr, maxc, maxr = candidate new_candidate = [minc, minr, maxc, maxr, 1] for next_candidate in detections_bboxes_next: n_minc, n_minr, n_maxc, n_maxr = next_candidate if (minc - WINDOW_FRAME <= n_minc <= minc + WINDOW_FRAME) and ( minr - WINDOW_FRAME <= n_minr <= minr + WINDOW_FRAME ) and (maxc - WINDOW_FRAME <= n_maxc <= maxc + WINDOW_FRAME) and (n_maxr - WINDOW_FRAME <= n_maxr <= n_maxr + WINDOW_FRAME): if new_candidate[4] != 0: new_candidate[4] = 0 new_candidates.append(new_candidate) #no_parked = [x for x in new_candidates if x[4] != 0] # print(len(new_candidates)) for n, candidate in enumerate(new_candidates): if candidate[4] != 0: minc, minr, maxc, maxr, parked = candidate final_det.append( Detection(detections_on_frame[n].frame, detections_on_frame[n].label, minc, minr, maxc - minc, maxr - minr, detections_on_frame[n].confidence)) # det_on_frame = [x for x in final_det if x.frame == n_frame] # det_bboxes = [o.bbox for o in det_on_frame] # for candidate in det_bboxes: # minc, minr, maxc, maxr = candidate # cv2.rectangle(fr, (minc, minr), (maxc, maxr), (0, 0, 255), 8) # Red # cv2.imshow('fr', fr) # cv2.waitKey(0) n_frame += 1 return final_det
def filtering_nms(detections, video_path): print("Applying non-maximum-supression") capture = cv2.VideoCapture(video_path) n_frame = 0 final_det = list() while capture.isOpened(): valid, frame = capture.read() if not valid: break fr = frame.copy() detections_on_frame = [x for x in detections if x.frame == n_frame] detections_bboxes = [o.bbox for o in detections_on_frame] pick = non_max_suppression_fast(detections_bboxes) for n, candidate in enumerate(detections_bboxes): if n in pick: minc, minr, maxc, maxr = candidate w = maxc - minc h = maxr - minr if w < 75 or h < 75: pass else: #cv2.rectangle(fr, (minc, minr), (maxc, maxr), (0, 0, 255), 8) # Red final_det.append( Detection(detections_on_frame[n].frame, detections_on_frame[n].label, minc, minr, maxc - minc, maxr - minr, detections_on_frame[n].confidence)) # cv2.imshow('fr', fr) # cv2.waitKey(10) n_frame += 1 return final_det
def read_detections(path): # [frame, -1, left, top, width, height, conf, -1, -1, -1] frame_detections = [] with open(path) as f: for line in f.readlines(): parts = line.split(',') frame_id = int(parts[0]) # while frame_id > len(frame_detections): # frame_detections.append([]) tl_x = int(float(parts[2])) tl_y = int(float(parts[3])) width = int(float(parts[4])) height = int(float(parts[5])) frame_detections.append( Detection(frame_id, 'car', tl_x, tl_y, width, height, 1)) return frame_detections
def create_detections(detection_mat, frame_idx, w_img=0, h_img=0): """Create detections for given frame index from the raw detection matrix. Parameters ---------- detection_mat : ndarray Matrix of detections. The first 10 columns of the detection matrix are in the standard MOTChallenge detection format. In the remaining columns store the feature vector associated with each detection. frame_idx : int The frame index. min_height : Optional[int] A minimum detection bounding box height. Detections that are smaller than this value are disregarded. Returns ------- List[tracker.Detection] Returns detection responses at given frame index. """ frame_indices = detection_mat[:, 0].astype(np.int) mask = frame_indices == frame_idx detection_list = [] for row in detection_mat[mask]: bbox, confidence, feature = row[2:6], row[6], row[10:] ''' bbx2 = bbox[0]+bbox[2] if bbox[0]+bbox[2]<=w_img else w_img bby2 = bbox[1]+bbox[3] if bbox[1]+bbox[3]<=h_img else h_img bbx1 = bbox[0] if bbox[0]>=0 else 0.0 bby1 = bbox[1] if bbox[1]>=0 else 0.0 bbox[0] = bbx1 bbox[1] = bby1 bbox[2] = bbx2 - bbx1 bbox[3] = bby2 - bby1 ''' detection_list.append(Detection(bbox, confidence, feature)) return detection_list
def track_objects_single(video_path, detections_list, gt_list, optical_flow = False, of_track= TrackingOF, display = False, export_frames = False, idf1 = True, save_pkl=True, name_pkl=''): colors = np.random.rand(500, 3) # used only for display tracks = [] max_track = -1 new_detections = [] of_detections = [] if idf1: acc = mm.MOTAccumulator(auto_id=True) capture = cv2.VideoCapture(video_path) n_frame = 0 pbar = tqdm(total=2140) while capture.isOpened(): valid, image = capture.read() if not valid: break frame_tracks = {} detections_on_frame = [x for x in detections_list if x.frame == n_frame] gt_on_frame = [x for x in gt_list if x.frame == n_frame] tracks, unused_detections, frame_tracks = update_tracks(image, tracks, detections_on_frame, frame_tracks) tracks, max_track, frame_tracks = obtain_new_tracks(tracks, unused_detections, max_track, frame_tracks) if display and n_frame%2==0 and n_frame < 200: visualize_tracks(image, frame_tracks, colors, display=display) if export_frames: visualize_tracks_opencv(image, frame_tracks, colors, export_frames=export_frames, export_path="output_frames/tracking/frame_{:04d}.png".format(n_frame)) # IDF1 computing detec_bboxes = [] detec_ids = [] for key, value in frame_tracks.items(): detec_ids.append(key) bbox = value['bbox'] conf = value['confidence'] detec_bboxes.append(bbox) cd = Detection(n_frame, 'car', bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1], conf, track_id=key, histogram=rgb_histogram(image[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2]), :])) new_detections.append(cd) if optical_flow: of_detections.append(of_track.check_optical_flow(new_detections, n_frame)) gt_bboxes = [] gt_ids = [] for gt in gt_on_frame: gt_bboxes.append(gt.bbox) gt_ids.append(gt.track_id) mm_gt_bboxes = [[(bbox[0]+bbox[2])/2, (bbox[1]+bbox[3])/2, bbox[2]-bbox[0], bbox[3]-bbox[1]] for bbox in gt_bboxes] mm_detec_bboxes = [[(bbox[0]+bbox[2])/2, (bbox[1]+bbox[3])/2, bbox[2] - bbox[0], bbox[3] - bbox[1]] for bbox in detec_bboxes] distances_gt_det = mm.distances.iou_matrix(mm_gt_bboxes, mm_detec_bboxes, max_iou=1.) if idf1: acc.update(gt_ids, detec_ids, distances_gt_det) pbar.update(1) n_frame += 1 pbar.close() capture.release() cv2.destroyAllWindows() if idf1: print(acc.mot_events) mh = mm.metrics.create() summary = mh.compute(acc, metrics=mm.metrics.motchallenge_metrics, name='acc') with open("results/metrics.txt", "a") as f: f.write(summary.to_string() + "\n") print(summary) if save_pkl: with open('detections' + name_pkl+'.pkl', 'wb') as f: pickle.dump(new_detections, f) with open('tracks' + name_pkl+'.pkl', 'wb') as f: pickle.dump(tracks, f) return new_detections, tracks
def read_annotations_from_xml(annotation_path, video_path): """ Arguments: capture: frames from video, opened as cv2.VideoCapture root: parsed xml annotations as ET.parse(annotation_path).getroot() """ capture = cv2.VideoCapture(video_path) root = ET.parse(annotation_path).getroot() ground_truths = [] tracks = [] images = [] num = 0 pbar = tqdm(total=2140) while capture.isOpened(): valid, image = capture.read() if not valid: break #for now: (take only numannotated annotated frames) #if num > numannotated: # break images.append(image) for track in root.findall('track'): gt_id = track.attrib['id'] label = track.attrib['label'] box = track.find("box[@frame='{0}']".format(str(num))) #if box is not None and (label == 'car' or label == 'bike'): # Read cars and bikes if box is not None and label == 'car': # Read cars if box.attrib['occluded'] == '1': # Discard occluded continue #if label == 'car' and box[0].text == 'true': # Discard parked cars # continue frame = int(box.attrib['frame']) #if frame < 534: # continue xtl = int(float(box.attrib['xtl'])) ytl = int(float(box.attrib['ytl'])) xbr = int(float(box.attrib['xbr'])) ybr = int(float(box.attrib['ybr'])) ground_truths.append( Detection(frame, label, xtl, ytl, xbr - xtl + 1, ybr - ytl + 1, 1, gt_id)) track_corresponding = [t for t in tracks if t.id == gt_id] if len(track_corresponding) > 0: track_corresponding[0].detections.append( Detection(frame + 1, label, xtl, ytl, xbr - xtl + 1, ybr - ytl + 1, 1)) else: track_corresponding = Track(gt_id, [ Detection(frame + 1, label, xtl, ytl, xbr - xtl + 1, ybr - ytl + 1, 1) ]) tracks.append(track_corresponding) pbar.update(1) num += 1 # print(ground_truths) pbar.close() capture.release() return ground_truths, tracks
def read_annotations_from_txt(gt_path, analyze=False): """ Read annotations from the txt files Arguments: gt_path: path to .txt file :returns: list of Detection """ ground_truths_list = list() tracks = [] if analyze: max_w = 0 min_w = 2000 max_h = 0 min_h = 2000 min_ratio = 100 max_ratio = 0 with open(gt_path) as f: for line in f: data = line.split(',') #if int(data[0])-1 < 534: # continue ground_truths_list.append( Detection(int(data[0]) - 1, 'car', int(float(data[2])), int(float(data[3])), int(float(data[4])), int(float(data[5])), float(data[6]), track_id=int(data[1]))) track_corresponding = [t for t in tracks if t.id == int(data[1])] if len(track_corresponding) > 0: track_corresponding[0].detections.append( Detection(int(data[0]) - 1, 'car', int(float(data[2])), int(float(data[3])), int(float(data[4])), int(float(data[5])), float(data[6]), track_id=int(data[1]))) else: track_corresponding = Track(int(data[1]), [ Detection(int(data[0]) - 1, 'car', int(float(data[2])), int(float(data[3])), int(float(data[4])), int(float(data[5])), float(data[6]), track_id=int(data[1])) ]) tracks.append(track_corresponding) if analyze: if int(data[4]) < min_w: min_w = int(data[4]) if int(data[4]) > max_w: max_w = int(data[4]) if int(data[5]) < min_h: min_h = int(data[5]) if int(data[5]) > max_h: max_h = int(data[5]) if int(data[5]) / int(data[4]) > max_ratio: max_ratio = int(data[5]) / int(data[4]) if int(data[5]) / int(data[4]) < min_ratio: min_ratio = int(data[5]) / int(data[4]) # print('width: [{0}, {1}]'.format(min_w, max_w)) # print('height: [{0}, {1}]'.format(min_h, max_h)) # print('ratio: [{0}, {1}]'.format(min_ratio, max_ratio)) return ground_truths_list, tracks