def evaluate(model, data_loader, device, save_path=None): n_threads = torch.get_num_threads() # FIXME remove this and make paste_masks_in_image run on the GPU torch.set_num_threads(1) cpu_device = torch.device("cpu") model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = 'Test:' y_true = [] y_pred = [] for image, targets in metric_logger.log_every(data_loader, 100, header): image = list(img.to(device) for img in image) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] torch.cuda.synchronize() model_time = time.time() outputs = model(image) outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] model_time = time.time() - model_time metric_logger.update(model_time=model_time) for target, output in zip(targets, outputs): frame = target['image_id'].item() y_true.append([ Detection(frame, None, label, *box) for box, label in zip(target['boxes'], target['labels']) ]) y_pred.append([ Detection(frame, None, label, *box, score) for box, label, score in zip(output['boxes'], output['labels'], output['scores']) ]) evaluator_time = time.time() map, _, _ = mean_average_precision(y_true, y_pred, sort_method='score') evaluator_time = time.time() - evaluator_time metric_logger.update(map=map, evaluator_time=evaluator_time) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) torch.set_num_threads(n_threads) if save_path: os.makedirs(os.path.dirname(save_path), exist_ok=True) with open(save_path, 'w') as f: for frame_dets in y_pred: for d in frame_dets: f.write( f'{d.frame}, -1, {d.xtl}, {d.ytl}, {d.width}, {d.height}, {d.score}, -1, -1, -1\n' )
def parse_annotations_from_xml(path): with open(path) as f: tracks = xmltodict.parse(f.read())['annotations']['track'] annotations = [] for track in tracks: id = track['@id'] label = track['@label'] boxes = track['box'] for box in boxes: if label == 'car': parked = box['attribute']['#text'].lower() == 'true' else: parked = None annotations.append( Detection(frame=int(box['@frame']), id=int(id), label=label, xtl=float(box['@xtl']), ytl=float(box['@ytl']), xbr=float(box['@xbr']), ybr=float(box['@ybr']), parked=parked)) return annotations
def refine_bbox(last_detections, new_detection, k=0.5): # No refinement for the first two frames if len(last_detections) < 2: return new_detection # Predict coordinates of new detection from last two detections pred_detection_xtl = 2 * last_detections[1].xtl - last_detections[0].xtl pred_detection_ytl = 2 * last_detections[1].ytl - last_detections[0].ytl pred_detection_xbr = 2 * last_detections[1].xbr - last_detections[0].xbr pred_detection_ybr = 2 * last_detections[1].ybr - last_detections[0].ybr # Compute average of predicted coordinates and detected coordinates refined_xtl = new_detection.xtl * k + pred_detection_xtl * (1 - k) refined_ytl = new_detection.ytl * k + pred_detection_ytl * (1 - k) refined_xbr = new_detection.xbr * k + pred_detection_xbr * (1 - k) refined_ybr = new_detection.ybr * k + pred_detection_ybr * (1 - k) # Get refined detection refined_detection = Detection(frame=new_detection.frame, id=new_detection.id, label=new_detection.label, xtl=refined_xtl, ytl=refined_ytl, xbr=refined_xbr, ybr=refined_ybr, score=new_detection.score) return refined_detection
def task2_2(debug=False, det_path='data/AICity_data/train/S03/c010/det/det_mask_rcnn.txt'): """ Object tracking: tracking with a Kalman filter """ reader = AICityChallengeAnnotationReader( path='data/ai_challenge_s03_c010-full_annotation.xml') gt = reader.get_annotations(classes=['car']) reader = AICityChallengeAnnotationReader(path=det_path) dets = reader.get_annotations(classes=['car']) cap = cv2.VideoCapture('data/AICity_data/train/S03/c010/vdo.avi') tracker = Sort() tracks = defaultdict(list) y_true = [] y_pred = [] acc = MOTAcumulator() for frame in dets.keys(): detections = dets.get(frame, []) new_detections = tracker.update( np.array([[*d.bbox, d.score] for d in detections])) new_detections = [ Detection(frame, int(d[-1]), 'car', *d[:4]) for d in new_detections ] y_true.append(gt.get(frame, [])) y_pred.append(new_detections) acc.update(y_true[-1], y_pred[-1]) if debug: cap.set(cv2.CAP_PROP_POS_FRAMES, frame) ret, img = cap.read() for d in new_detections: tracks[d.id].append(d.bbox) np.random.seed(d.id) color = tuple(np.random.randint(0, 256, 3).tolist()) for dd in tracks[d.id]: cv2.circle(img, (int( (dd[0] + dd[2]) / 2), int((dd[1] + dd[3]) / 2)), 5, color, -1) cv2.imshow('image', cv2.resize(img, (900, 600))) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows() ap, prec, rec = mean_average_precision(y_true, y_pred, classes=['car']) idf1, idp, idr = acc.get_idf1() print( f"AP: {ap:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}, IDF1: {idf1:.4f}, IDP: {idp:.4f}, IDR: {idr:.4f}" )
def bounding_boxes(mask, min_height, max_height, min_width, max_width, frame): contours, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) detections = [] for c in contours: x, y, w, h = cv2.boundingRect(c) if min_width < w < max_width and min_height < h < max_height: detections.append(Detection(frame, None, 'car', x, y, x + w, y + h)) return detections
def propagate_track(self, frame_id): # propagate track as if there was a detection, perhaps object is temporarily occluded or not detected # use predicted bb as measurement det = Detection.det_from_numpy_array( self.history[self.history.shape[0] - 1]) det.bbox = self.get_predicted_next_bb() # TODO: adjust bbox to have same width height but only propegate x,y centroid position # pred_c_x, pred_c_y = util.centroid_from_bb(self.get_predicted_next_bb()) # wid, ht = util.wid_ht_from_bb(det.bbox) # det.bbox = np.array([pred_c_x - wid / 2, # pred_c_y - ht / 2, # pred_c_x + wid / 2, # pred_c_y + ht / 2], dtype=np.int32) det.frame_id = frame_id self.add_to_track(det)
def parse_annotations_from_txt(path): """ MOTChallenge format [frame, ID, left, top, width, height, conf, -1, -1, -1] """ with open(path) as f: lines = f.readlines() annotations = [] for line in lines: data = line.split(',') annotations.append( Detection(frame=int(data[0]) - 1, id=int(data[1]), label='car', xtl=float(data[2]), ytl=float(data[3]), xbr=float(data[2]) + float(data[4]), ybr=float(data[3]) + float(data[5]), score=float(data[6]))) return annotations
if __name__ == '__main__': data_directoy = "../NSL-KDD/" train_file = data_directoy + "KDDTrain+.csv" train_file_20_percent = data_directoy + "KDDTrain+_20Percent.csv" test_file = data_directoy + "KDDTest+.csv" normal_file = data_directoy + "20/normal.csv" data_file = data_directoy + "20/data.csv" labels_file = data_directoy + "20/labels.csv" det = Detection(classes=classes, threshold_percentile=threshold_percentile, truth_size=truth_size, truth_update_frac=truth_update_frac, truth_save_folder=truth_save_folder, outlier_save_folder=outlier_save_folder, classifier_save_folder=classifier_save_folder) """ normal_data = load_data(normal_file, categorical_feature_list=categorical_feature_list, categories_list=categories_list) X = load_data(data_file, categorical_feature_list=categorical_feature_list, categories_list=categories_list) y = load_labels(labels_file, labels=labels, classes=classes) y_cls = y['class'] """ df = pd.read_csv(data_directoy + train_file, header=None, names=headers) df_X = df[features].copy() df_y = df[target].copy()
def task1_1(architecture, start=0, length=None, save_path='results/week3', gpu=0, visualize=False, save_detection='detection_results/'): """ Object detection: off-the-shelf """ tensor = transforms.ToTensor() if architecture.lower() == 'fasterrcnn': model = detection.fasterrcnn_resnet50_fpn(pretrained=True) elif architecture.lower() == 'maskrcnn': model = detection.maskrcnn_resnet50_fpn(pretrained=True) else: raise ValueError(architecture) save_path = os.path.join(save_path, architecture) # Read Video and prepare ground truth cap = cv2.VideoCapture('data/AICity_data/train/S03/c010/vdo.avi') if not length: length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) reader = AICityChallengeAnnotationReader( path='data/ai_challenge_s03_c010-full_annotation.xml') gt = reader.get_annotations(classes=['car']) gt = {frame: gt[frame] for frame in range(start, start + length)} # Start Inference device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu) model.to(device) model.eval() detections = {} y_true, y_pred = [], [] if save_detection: path = os.path.join(save_detection, architecture) if not os.path.exists(path): os.makedirs(path) detection_file = open(f'{path}/{architecture.lower()}.txt', 'w') with torch.no_grad(): for frame in range(start, length): cap.set(cv2.CAP_PROP_POS_FRAMES, frame) ret, img = cap.read() # Transform input to tensor print(f'Predict: {frame}') start_t = time.time() x = [tensor(img).to(device)] preds = model(x)[0] print( f'Inference time per frame: {round(time.time() - start_t, 2)}') # filter car predictions and confidences joint_preds = list( zip(preds['labels'], preds['boxes'], preds['scores'])) car_det = list(filter(lambda x: x[0] == 3, joint_preds)) # car_det = list(filter(lambda x: x[2] > 0.70, car_det)) car_det = get_nms(car_det, 0.7) # add detections detections[frame] = [] for det in car_det: det_obj = Detection(frame=frame, id=None, label='car', xtl=float(det[1][0]), ytl=float(det[1][1]), xbr=float(det[1][2]), ybr=float(det[1][3]), score=det[2]) detections[frame].append(det_obj) if save_detection: detection_file.write( f"{frame},-1,{det_obj.xtl},{det_obj.ytl},{det_obj.width},{det_obj.height},{det_obj.score},-1,-1,-1\n" ) y_pred.append(detections[frame]) y_true.append(gt.get(frame, [])) ap, prec, rec = mean_average_precision(y_true, y_pred, classes=['car']) print( f'Network: {architecture}, AP: {ap:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}' ) if visualize: print(f'Saving result to {save_path}') if not os.path.exists(save_path): os.makedirs(save_path) video_iou_plot(gt, detections, video_path='data/AICity_data/train/S03/c010/vdo.avi', title=f'{architecture} detections', save_path=save_path) cv2.destroyAllWindows() if save_detection: detection_file.close()
while True: _, img = vc.read() if img is None: break current_frame_position += 1 img = cv2.resize( img, (int(img.shape[1] * scale_factor), int(img.shape[0] * scale_factor))) # detect objects in frame bboxes = detector.detect(img) detections = [] # our list of Detections objects bb = None current_time = datetime.datetime.now() if bboxes is not None and len(bboxes) > 0: for i, bb in enumerate(bboxes): det = Detection() det.bbox = np.array([bb[0], bb[1], bb[2], bb[3]]) det.frame_id = current_frame_position detections.append(det) # DRAW DETECTIONS IN GREEN (B, G, R) = (0, 255, 0) cv2.rectangle(img, (bb[0], bb[1]), (bb[2], bb[3]), (0, 255, 0), 2) # log the data, recall headers = ['time', 'id', 'x1', 'y1', 'x2', 'y2'] bb_id = i data = [current_time, bb_id, bb[0], bb[1], bb[2], bb[3]] logger.log(data=data) tracker.update_tracks(detections=detections, frame_id=current_frame_position) tracker.draw_tracks(img) cv2.imshow('img', img) key = cv2.waitKey(1)
def run(): # create access to settings object settings = Settings() # initiate logging if settings.ENABLE_LOGGING is True: if not os.path.exists(settings.LOGGING_DIRECTORY): os.mkdir(settings.LOGGING_DIRECTORY) log_filename = os.path.join(settings.LOGGING_DIRECTORY, settings.OUTPUT_VIDEO_FILENAME + '.csv') # [current_frame_position, current_time_string, uid, latest_bbox, lidar_distance] headers = [ 'frame#', 'time', 'uid', 'x1', 'y1', 'x2', 'y2', 'lidar_d(ft)', 'grnd_angle_d(ft)' ] data_logger = DataLogger(filename=log_filename, headers=headers) vc = cv2.VideoCapture() if settings.SIMULATION_MODE is True: vc.open(settings.INPUT_VIDEO_FILE) else: vc.open(settings.WEBCAM_ID) if settings.RECORD_VIDEO is True: if not os.path.exists(settings.OUTPUT_VIDEO_FOLDER): os.mkdir(settings.OUTPUT_VIDEO_FOLDER) # get first frame of video to get recording size _, frame = vc.read() # declare video writer obj fourcc = cv2.VideoWriter_fourcc(*'XVID') vw = cv2.VideoWriter( os.path.join(settings.OUTPUT_VIDEO_FOLDER, settings.OUTPUT_VIDEO_FILENAME + '.avi'), fourcc, 20.0, (frame.shape[1], frame.shape[0])) detector = CarDetector() tracker = MultipleObjectTracker() if settings.SIMULATION_MODE is True: lidar = LidarSensor(mode='simulation', simulation_data_file=settings.SIMULATION_FILE) else: lidar = LidarSensor() distance_predictor = DistancePredictor() scale_factor = 0.75 # reduce frame by this size tracked_bbox = [] # a list of past bboxes current_frame_position = 0 while True: # take a snapshot of the current time current_time = datetime.datetime.now() current_time_string = current_time.strftime( '%Y-%m-%d %H:%M:%S.%f')[:-3] # first get lidar distance lidar_distance = lidar.get_distance( frame_number=current_frame_position) # now acquire image and get detections _, img = vc.read() if img is None: print 'null image retrieved! frame={}'.format( current_frame_position) break current_frame_position += 1 img = cv2.resize(img, (int( img.shape[1] * scale_factor), int(img.shape[0] * scale_factor))) # detect objects in frame bboxes = detector.detect(img) detections = [] # our list of Detections objects bb = None if bboxes is not None and len(bboxes) > 0: for i, bb in enumerate(bboxes): det = Detection() det.bbox = np.array([bb[0], bb[1], bb[2], bb[3]]) det.frame_id = current_frame_position detections.append(det) # DRAW DETECTIONS IN GREEN (B, G, R) = (0, 255, 0) cv2.rectangle(img, (bb[0], bb[1]), (bb[2], bb[3]), (0, 255, 0), 2) # log the data, recall headers = ['time', 'id', 'x1', 'y1', 'x2', 'y2'] bb_id = i data = [current_time, bb_id, bb[0], bb[1], bb[2], bb[3]] tracker.update_tracks(detections=detections, frame_id=current_frame_position) # create a list of the current row for logging logging_data = [] # get the "head" of each track current_tracks = tracker.get_track_heads() for current_track in current_tracks: latest_bbox = current_track.get_latest_bb() # get predicted distances from bounding box predicted_distances = distance_predictor.predict(latest_bbox) uid = current_track.uid logging_row = [ current_frame_position, current_time_string, uid, latest_bbox[0], latest_bbox[1], latest_bbox[2], latest_bbox[3], lidar_distance, predicted_distances[0] ] logging_data.append(logging_row) if settings.ENABLE_LOGGING is True: data_logger.log(logging_data) tracker.draw_tracks(img) # draw lidar distance cv2.putText(img, 'lidar_dist = ' + str(int(lidar_distance)) + 'ft', (0, 25), 1, 1.5, (255, 0, 0), 2) cv2.imshow('img', img) key = cv2.waitKey(1) if key == 27: break if settings.RECORD_VIDEO is True: # rescale image back to original shape img = cv2.resize(img, (int(img.shape[1] / scale_factor), int(img.shape[0] / scale_factor))) vw.write(img)
def launch_test_kalman_filter(save_path, distance_thresholds, min_track_len, min_width, min_height, sequence, camera, detector): save_video = False save_summary = False fps = 24 os.makedirs(save_path, exist_ok=True) reader = AICityChallengeAnnotationReader(path='data/AICity_data/train/' + sequence + '/' + camera + '/gt/gt.txt') gt = reader.get_annotations(classes=['car']) reader = AICityChallengeAnnotationReader(path='data/AICity_data/train/' + sequence + '/' + camera + '/det/det_' + detector + '.txt') dets = reader.get_annotations(classes=['car']) cap = cv2.VideoCapture('data/AICity_data/train/' + sequence + '/' + camera + '/vdo.avi') n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) if save_video: writer = imageio.get_writer(os.path.join( save_path, 'task1_' + sequence + '_' + camera + '_' + detector + '.gif'), fps=fps) tracker = Sort() y_true = [] tracks = [] max_track = 0 video_percentage = 1 start = 0 end = int(n_frames * video_percentage) for frame in trange(start, end, desc='Tracking'): detections_on_frame_ = dets.get(frame, []) detections_on_frame = [] for d in detections_on_frame_: if min_width < (d.ybr - d.ytl) and min_height < (d.xbr - d.xtl): detections_on_frame.append(d) detections_on_frame = tracker.update( np.array([[*d.bbox, d.score] for d in detections_on_frame])) detections_on_frame = [ Detection(frame, int(d[-1]), 'car', *d[:4]) for d in detections_on_frame ] tracks, frame_tracks, max_track = update_tracks_by_overlap( tracks, detections_on_frame, max_track, refinement=False, optical_flow=None) y_true.append(gt.get(frame, [])) idf1s = [] for distance_threshold in distance_thresholds: accumulator = MOTAcumulator() y_pred = [] moving_tracks = remove_static_tracks(tracks, distance_threshold, min_track_len) detections = [] for track in moving_tracks: detections.extend(track.detections) detections = group_by_frame(detections) for frame in trange(start, end, desc='Accumulating detections'): if save_video: cap.set(cv2.CAP_PROP_POS_FRAMES, frame) ret, img = cap.read() for det in y_true[frame]: cv2.rectangle(img, (int(det.xtl), int(det.ytl)), (int(det.xbr), int(det.ybr)), (0, 255, 0), 6) frame_detections = [] for det in detections.get(frame, []): frame_detections.append(det) if save_video: cv2.rectangle(img, (int(det.xtl), int(det.ytl)), (int(det.xbr), int(det.ybr)), track.color, 6) cv2.rectangle(img, (int(det.xtl), int(det.ytl)), (int(det.xbr), int(det.ytl) - 15), track.color, -6) cv2.putText(img, str(det.id), (int(det.xtl), int(det.ytl)), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 6) cv2.circle(img, track.detections[-1].center, 5, track.color, -1) y_pred.append(frame_detections) if save_video: writer.append_data(cv2.resize(img, (600, 350))) accumulator.update(y_true[frame], y_pred[-1]) ap, prec, rec = mean_average_precision(y_true, y_pred, classes=['car'], sort_method=None) print(f'AP: {ap:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}') print('Additional metrics:') summary = accumulator.get_idf1() # As mentioned in https://github.com/cheind/py-motmetrics: # FAR = FalsePos / Frames * 100 # MOTP = (1 - MOTP) * 100 print(summary) if save_summary: with open( os.path.join( save_path, 'task1_' + sequence + '_' + camera + '_' + detector + '_' + str(distance_threshold) + '.txt'), 'w') as f: f.write(str(summary)) idf1s.append(summary['idf1']['acc'] * 100) cv2.destroyAllWindows() if save_video: writer.close() return idf1s