def analyze_sequence(method, color_conversion, sigma_thr=3, rho=0.01): """ Analyze the video sequence with a given method and color conversion """ # Read GT from dataset gtExtractor = annotation_parser.annotationsParser( AICITY_DIR.joinpath('m6-full_annotation.xml')) bckg_subs = background_substractor(method, sigma_thr=sigma_thr, rho=rho) video_name = 'video_' + method + '_' + str(color_conversion) + '.avi' # video = cv2.VideoWriter(video_name, # cv2.VideoWriter_fourcc('M', 'P', '4', 'S'), 20, # #cv2.VideoWriter_fourcc('H', '2', '6', '4'), 10, # (1920, 1080)) detections = [] # frames =100 frames = gtExtractor.getGTNFrames() for i in range(frames): # load the image frame_path = AICITY_DIR.joinpath('frames', 'image-{:04d}.png'.format(i + 1)) image = cv2.imread(str(frame_path)) if color_conversion != None: image = cv2.cvtColor(image, color_conversion) image = bckg_subs.apply(image) image = process_image(image) frame_detections = get_detections(image) for detection in frame_detections: detections.append([ i, 0, detection[0], detection[1], detection[2], detection[3], 1 ]) image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) image = add_detections_gt(image, frame_detections, gtExtractor, i) # show the output image cv2.imshow("Image", image) cv2.waitKey(1) # video.write(image) compute_iou(gtExtractor, detections, 0.5, method, color_conversion, frames)
def __init__(self, id, initial_roi: ROI, frame_id): self.objectId = id self.track = {} self.track_corrected = {} frame_path = AICITY_DIR.joinpath('frames', 'image-{:04d}.png'.format(frame_id)) initial_image = cv2.imread(str(frame_path)) self.OF = OpticalFlowTracker(initial_image, initial_roi) self.color = (int(random.random() * 256), int(random.random() * 256), int(random.random() * 256)) self.first_frame = True
def process_image(image): """ Apply following operations to backgrouns substracted image to improve moving object detection: - Thresholding to remove shadows in some models - Morphological opening with a 5x5 circular structuring element to reduce noise - Morphological closing with a 10x10 circular structuring element to fill objects - Apply roi.jog mask Returns: Processed image """ ret, image = cv2.threshold(image, 130, 255, cv2.THRESH_BINARY) kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (4, 4)) image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel) kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (10, 10)) image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel) roi_path = AICITY_DIR.joinpath('roi.jpg') roi = cv2.imread(str(roi_path), 0) image = cv2.bitwise_and(image, roi) return image
def add_frame_roi(self, frame_id, roi: ROI): if self.first_frame: roi.objectId = self.objectId r = ROI(roi.xTopLeft, roi.yTopLeft, roi.xBottomRight, roi.yBottomRight, self.objectId) self.track[frame_id] = r self.track_corrected[frame_id] = r self.first_frame = False else: roi.objectId = self.objectId r = ROI(roi.xTopLeft, roi.yTopLeft, roi.xBottomRight, roi.yBottomRight, self.objectId) self.track[frame_id] = r frame_path = AICITY_DIR.joinpath( 'frames', 'image-{:04d}.png'.format(frame_id)) image = cv2.imread(str(frame_path)) new_center = self.OF.predict(image, self.objectId, frame_id, False) self.OF.correct(image, roi) raux = roi.reposition(new_center) r_c = ROI(raux.xTopLeft, raux.yTopLeft, raux.xBottomRight, raux.yBottomRight, self.objectId) self.track_corrected[frame_id] = r_c
def test_filter_bboxes_out_or_roi(): """ Shows images and boundig boxes of all samples of a dataset """ dataset = AICityDataset(AICITY_DIR, AICITY_ANNOTATIONS) roi_image = cv2.imread(str(AICITY_DIR.joinpath('roi.jpg'))) for image, label in dataset: # Convert to CV2 image image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) bbox = ( label[2], label[3], label[4], label[5], ) output_image = filter_bbox_out_of_roi(image, bbox, roi_image, threshold=0.5) # show the output image cv2.imshow("Image", output_image) cv2.waitKey(0)
def test_iou_with_noise(): # Read GT from dataset gtExtractor = detection_gt_extractor.detectionExtractorGT( AICITY_DIR.joinpath('gt', 'gt.txt')) # parameters to randomize detections randomNoiseScale = 100 additionDeletionProbability = 0.0 TP = 0 FN = 0 FP = 0 threshold = 0.5 for i in range(gtExtractor.getGTNFrames()): # Get GT BBOX gt = [] for j in range(len( if gtExtractor.getGTFrame(j) == i: gtBBOX = gtExtractor.getGTBoundingBox(j) gt.append(gtBBOX) # Get detection BBOX detections = randomizer.randomize_detections( additionDeletionProbability, randomNoiseScale, gt[:]) BBoxesDetected = [] for x in range(len(gt)): gtBBOX = gt[x] detection = [] maxIoU = 0 BBoxDetected = -1 for y in range(len(detections)): iou = intersection_over_union.iou_from_bb( gtBBOX, detections[y]) if iou >= maxIoU: maxIoU = iou detection = detections[y] BBoxDetected = y if maxIoU > threshold: TP = TP + 1 BBoxesDetected.append(BBoxDetected) else: FN = FN + 1 # load the image frame_path = AICITY_DIR.joinpath('frames', 'image-{:04d}.png'.format(i + 1)) image = cv2.imread(str(frame_path)) # draw the ground-truth bounding box along with the predicted # bounding box cv2.rectangle(image, (int(detection[0]), int(detection[1])), (int(detection[2]), int(detection[3])), (0, 0, 255), 2) cv2.rectangle(image, (int(gtBBOX[0]), int(gtBBOX[1])), (int(gtBBOX[2]), int(gtBBOX[3])), (0, 255, 0), 2) # compute the intersection over union and display it cv2.putText(image, "IoU: {:.4f}".format(maxIoU), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) print("{}: {:.4f}".format(frame_path, maxIoU)) # show the output image cv2.imshow("Image", image) cv2.waitKey(0) for y in range(len(detections)): if not BBoxesDetected.__contains__(y): FP = FP + 1
def compute_mAP(verbose: bool = False, plot: bool = False): """ Compute mAP given some detections and a ground truth. It only gets the detections where we have ground truth information (regardless there is a bounding box on it or not) Note: Algorithm implemented:: * For every frame * For each confidence level * Compute precision-recall (consider TP when IoU >= 0.5) * Interpolate precision using 11 ranks r={0.0, 0.1, ... 1.0} * AP = Average for all interpolated precisions * mAP = Mean of AP for all frames """ # Parameters iou_threshold = 0.5 recalls = np.linspace(start=0, stop=1.0, num=11).round(decimals=1) # Selects detections from available model predictions detections_path = AICITY_DIR.joinpath('det', 'det_mask_rcnn.txt') # detections_path = AICITY_DIR.joinpath('det', 'det_ssd512.txt') # detections_path = AICITY_DIR.joinpath('det', 'det_yolo3.txt') detections = detections_loader.load_bounding_boxes(detections_path, False) dataset = AICityDataset(AICITY_DIR, AICITY_ANNOTATIONS) ground_truth = dataset.get_labels() # Select detections only for frames we have ground truth mask = np.zeros(detections.shape[0], dtype=np.bool) frame_numbers = np.unique(ground_truth[:, 0]) for frame_number in frame_numbers: mask |= detections[:, 0] == frame_number detections = detections[mask] # Computes AP for every frame ap_per_frame = np.empty((0, 2)) for frame_number in frame_numbers: det = detections[detections[:, 0] == frame_number] gt = ground_truth[ground_truth[:, 0] == frame_number] # For each confidence value # Gets precision and recall for a frame considering IoU >= 0.5 TP confidences = np.unique(det[:, 5]) confidences.sort() prs = np.empty((0, 2)) for confidence in confidences: # Gets detection/s with higher confidence score than a threshold det_with_confidence_level = det[det[:, 5] >= confidence, :] det_bboxes = det_with_confidence_level[:, 1:5] gt_bboxes = gt[:, 2:6] pr = mean_ap.get_precision_recall(det=det_bboxes, gt=gt_bboxes, threshold=iou_threshold) prs = np.vstack((prs, pr)) # print(f'Precision/Recall table:\n{prs}') # Interpolate p(r) for given r precisions_at_specific_recall = mean_ap.interpolate_precision( prs, recalls) AP = precisions_at_specific_recall.mean() if verbose: print(f'Frame {frame_number} precision-recall {prs} AP: {AP}') if plot: # Plot AP and its interpolation plt.subplot(211) plt.plot(prs[:, 1], prs[:, 0]) plt.xlabel('Recall') plt.ylabel('Precision') plt.title('Mean precision as a function of mean recall') plt.axis((0, 1.1, 0, 1.1)) plt.subplot(212) plt.plot(recalls, precisions_at_specific_recall) plt.xlabel('Recall') plt.ylabel('Precision') plt.title('Interpolated') plt.axis((0, 1.1, 0, 1.1)) # [frame_num, AP] column-table ap_per_frame = np.vstack((ap_per_frame, (frame_number, AP))) mAP = ap_per_frame[:, 1].mean() print(f'Detections: {detections_path.stem} --> mAP: {mAP}') return mAP
Detections are formatted as ``` [frame, left, upper, right, lower, score] ``` where frame starts at 0.0 and all elements are float types """ if contains_width_and_height: return detections_to_bounding_boxes_width_height(load_detections(path)) else: return detections_to_bounding_boxes(load_detections(path)) if __name__ == '__main__': filepath = AICITY_DIR.joinpath('det', 'det_mask_rcnn.txt') detections = load_detections(filepath) print(detections.shape) bb = detections_to_bounding_boxes(detections) print(bb.shape) # print(detections[0,:]) # print(bb[0,:]) filepath = AICITY_DIR.joinpath('det', 'det_ssd512.txt') detections = load_detections(filepath) print(detections.shape) filepath = AICITY_DIR.joinpath('det', 'det_yolo3.txt') detections = load_detections(filepath) print(detections.shape)
def iou_vs_time(): # Read GT from dataset gtExtractor = annotation_parser.annotationsParser( AICITY_DIR.joinpath('AICITY_team4.xml')) # detector = detectionExtractorGT( # AICITY_DIR.joinpath('det', 'det_mask_rcnn.txt')) # detector = detectionExtractorGT( # AICITY_DIR.joinpath('det', 'det_ssd512.txt')) detector = detection_gt_extractor.detectionExtractorGT( AICITY_DIR.joinpath('det', 'det_yolo3.txt')) TP = 0 FN = 0 FP = 0 threshold = 0.5 ROIPath = AICITY_DIR.joinpath('roi.jpg') IoUvsFrames = [] # video = cv2.VideoWriter('video.avi', # cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 10, # (1920, 1080)) video = cv2.VideoWriter('video.avi', cv2.VideoWriter_fourcc('M', 'P', '4', '2'), 10, (1920, 1080)) for i in range(gtExtractor.getGTNFrames()): # load the image frame_path = AICITY_DIR.joinpath('frames', 'image-{:04d}.png'.format(i + 1)) image = cv2.imread(str(frame_path)) IoUvsFrame = [] # Get GT BBOX gt = [] for j in range(len( if gtExtractor.getGTFrame(j) == i: gtBBOX = gtExtractor.getGTBoundingBox(j) gt.append(gtBBOX) # gt = refinement.refineBBOX(gt) # Get detection BBOX detections = [] for j in range(len( if detector.getGTFrame(j) == i: detBBOX = detector.getGTBoundingBox(j) detections.append(detBBOX) # detections = refinement.refineBBOX(detections) BBoxesDetected = [] for x in range(len(gt)): gtBBOX = gt[x] detection = [] maxIoU = 0 BBoxDetected = -1 for y in range(len(detections)): iou = intersection_over_union.iou_from_bb( gtBBOX, detections[y]) if iou >= maxIoU: maxIoU = iou detection = detections[y] BBoxDetected = y if maxIoU > threshold: TP = TP + 1 BBoxesDetected.append(BBoxDetected) else: FN = FN + 1 # draw the ground-truth bounding box along with the predicted # bounding box if detection != []: cv2.rectangle(image, (int(detection[0]), int(detection[1])), (int(detection[2]), int(detection[3])), (0, 0, 255), 2) cv2.rectangle(image, (int(gtBBOX[0]), int(gtBBOX[1])), (int(gtBBOX[2]), int(gtBBOX[3])), (0, 255, 0), 2) # compute the intersection over union and display it IoUvsFrame.append(maxIoU) cv2.putText(image, "IoU: {:.4f}".format(maxIoU), (10, 30 + 20 * x), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) print("{}: {:.4f}".format(frame_path, maxIoU)) if not IoUvsFrame: IoUvsFrame = [0] IoUvsFrames.append(sum(IoUvsFrame) / len(IoUvsFrame)) for y in range(len(detections)): if not BBoxesDetected.__contains__(y): FP = FP + 1 detection = detections[y] cv2.rectangle(image, (int(detection[0]), int(detection[1])), (int(detection[2]), int(detection[3])), (0, 0, 255), 2) # show the output image cv2.imshow("Image", image) cv2.waitKey(1) video.write(image) plt.plot(IoUvsFrames)