def speed_test(self, custom_settings=None): """ custom_settings, if set, should be a tuple of (nms_threshold, conf_threshold, device), this device - cuda:0 or cpu !!! overwrites the original settings """ if custom_settings: print("Current custom settings: ", custom_settings) nms_thresh, conf_thresh, device = custom_settings self.output_handler.suppress_threshold = nms_thresh self.output_handler.confidence_threshold = conf_thresh self.device = device self.model.to(self.device) run = 0 total_time_model, total_time_pre_nms, total_time_nms, in_nms_boxes = 0, 0, 0, 0 while run < self.runs: c_img = 0 times_model, times_pre_nms, times_nms, in_nms_boxes = 0, 0, 0, 0 self.valid_loader_iter = iter(self.valid_loader) while c_img < self.n_images: (boxes, confs), image_info, last_model = self.val_image_output() times_model += last_model start_pre_nms = time.time() boxes, classes = postprocess_until_nms(self.output_handler, boxes, confs, image_info[0][1]) last_pre_nms = time.time() - start_pre_nms times_pre_nms += last_pre_nms boxes = wh2corners_numpy(boxes[:, :2], boxes[:, 2:]) start_nms = time.time() boxes = boxes[:200] in_nms_boxes += len(boxes) _ = nms(boxes, classes, self.output_handler.suppress_threshold) last_nms = time.time() - start_nms times_nms += last_nms c_img += 1 if self.print_each_run: self.print_stats(times_model, times_pre_nms, times_nms, self.n_images) print("Mean number of boxes processed by nms: ", "{:.2f}".format(in_nms_boxes / self.n_images)) run += 1 total_time_model += times_model total_time_pre_nms += times_pre_nms total_time_nms += times_nms print("Final results:") print("--------------------------------------") print("--------------------------------------\n\n") self.print_stats(total_time_model, total_time_pre_nms, total_time_nms, self.n_images * self.runs)
def run_inference(self, image, modify_image=True, custom_settings=None): """ If modify_image flag is True, the boxes are drawn on the given image, otherwise this function just returns the predicted boxes custom_settings, if set, should be a tuple of (nms_threshold, conf_threshold, device), this device - cuda:0 or cpu !!! overwrites the original settings """ if custom_settings: print("Current custom settings: ", custom_settings) nms_thresh, conf_thresh, device = custom_settings self.output_handler.suppress_threshold = nms_thresh self.output_handler.confidence_threshold = conf_thresh self.device = device self.model.to(self.device) with torch.no_grad(): original_image = image.copy() heigth, width, _ = original_image.shape image = cv2.resize(image, (300, 300)) image = F.to_tensor(image) image = F.normalize(image, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) image = image.to(self.device) image = image.unsqueeze(dim=0) boxes, confs = self.model(image) boxes = boxes.squeeze().permute(1, 0) confs = confs.squeeze().permute(1, 0) boxes, classes = postprocess_until_nms(self.output_handler, boxes, confs, (width, heigth)) boxes = wh2corners_numpy(boxes[:, :2], boxes[:, 2:]) kept_indeces = nms(boxes, classes, self.output_handler.suppress_threshold) boxes = boxes[kept_indeces].astype(int) # clip values in image range clip_boxes(boxes, width, heigth) if modify_image: image = self.plot_boxes(original_image, boxes) return image return boxes
def process_outputs(self, bbox_predictions, classification_predictions, image_info): """ returns complete model outputs in format array of: bbox, class id, confidence all operations done on cpu """ prediction_bboxes, predicted_classes, highest_confidence_for_predictions, _ = self._get_sorted_predictions( bbox_predictions, classification_predictions, image_info) # convert to corners for nms prediction_bboxes = wh2corners_numpy(prediction_bboxes[:, :2], prediction_bboxes[:, 2:]) indeces_kept_by_nms = nms(prediction_bboxes, predicted_classes, self.suppress_threshold) # new structure: array of bbox, class, confidence prediction_bboxes = corners_to_wh(prediction_bboxes) complete_outputs = np.concatenate( (prediction_bboxes, predicted_classes, highest_confidence_for_predictions), axis=1) return complete_outputs[indeces_kept_by_nms]
def test_anchor_mapping(image, bbox_predictions, classification_predictions, gt_bbox, gt_class, image_info, params, model_outputs, visualize_anchors, visualize_anchor_gt_pair, all_anchor_classes, verbose=False, very_verbose=False): """ Args: image - C x H x W normalized tensor bbox_predictions - 4 x #anchors tensor classification_predictions - #classes x #anchors tensor gt_bbox - 4 x #anchors tensor gt_class - #classes x #anchors tensor image_info - (image_id, (width, height)) model_outputs - flag to check model outputs or not visualize_anchors, visualize_anchor_gt_pair - similar flags """ output_handler = Model_output_handler(params) anchors_ltrb = default_boxes(order="ltrb") anchors_xywh = default_boxes(order="xywh") overlaps = jaccard(wh2corners(gt_bbox[:, :2], gt_bbox[:, 2:]), anchors_ltrb) processed_predicted_bboxes, processed_predicted_classes, highest_confidence_for_predictions, _ = output_handler._get_sorted_predictions( bbox_predictions, classification_predictions, image_info) # map each anchor to the highest IOU obj, gt_idx - ids of mapped objects gt_bbox_for_matched_anchors, matched_gt_class_ids, pos_idx = map_to_ground_truth( overlaps, gt_bbox, gt_class, params) indeces_kept_by_nms = nms( wh2corners_numpy(processed_predicted_bboxes[:, :2], processed_predicted_bboxes[:, 2:]), processed_predicted_classes, output_handler.suppress_threshold) # get things in the right format image = output_handler._unnorm_scale_image(image) pos_idx = (pos_idx.cpu().numpy()) gt_bbox = output_handler._rescale_bboxes(gt_bbox, image_info[1]) gt_class = gt_class.cpu().numpy() all_anchor_classes = map_id_to_idx(all_anchor_classes).cpu().numpy() # get model predictions, unsorted and no nms raw_bbox_predictions = output_handler._convert_offsets_to_bboxes( bbox_predictions, image_info[1]) raw_class_confidences = output_handler._convert_confidences_to_workable_data( classification_predictions) raw_class_indeces, _ = output_handler._get_predicted_class( raw_class_confidences) # rescale gt bboxes and anchors gt_bbox_for_matched_anchors = output_handler._rescale_bboxes( gt_bbox_for_matched_anchors, image_info[1]) matched_gt_class_idxs = map_id_to_idx( matched_gt_class_ids[pos_idx]).cpu().numpy() anchors_xywh = output_handler._rescale_bboxes(anchors_xywh, image_info[1]) if model_outputs: test(raw_bbox=raw_bbox_predictions, raw_class_confidences=classification_predictions, raw_class_indeces=raw_class_indeces, gt_bbox=gt_bbox, gt_class=gt_class, pred_bbox=processed_predicted_bboxes, pred_class=processed_predicted_classes, highest_confidence_for_predictions= highest_confidence_for_predictions, indeces_kept_by_nms=indeces_kept_by_nms, pos_idx=pos_idx, size=image_info[1], image=image, anchors=anchors_xywh, gt_bbox_for_matched_anchors=gt_bbox_for_matched_anchors, matched_gt_class_idxs=matched_gt_class_idxs, all_anchor_classes=all_anchor_classes, verbose=verbose, very_verbose=very_verbose) return inspect_anchors( image=image, anchors=anchors_xywh, gt_bbox_for_matched_anchors=gt_bbox_for_matched_anchors, gt_classes_for_matched_anchors=matched_gt_class_idxs, pos_idx=pos_idx, size=image_info[1], visualize_anchors=visualize_anchors, visualize_anchor_gt_pair=visualize_anchor_gt_pair)
if args.input[-4:] == '.mp4' or args.input[-4:] == '.avi': video = cv2.VideoCapture(args.input) video_length = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) pbar = tqdm(unit='frame', total=video_length) while video.isOpened(): ret, frame = video.read() if ret is not True: video.release() break resized_frame = cv2.resize(frame, (config['IMAGE_SIZE'], config['IMAGE_SIZE'])) detected_bboxes = sess.run(model.outputs, feed_dict={model.inputs: np.expand_dims(resized_frame, axis=0)}) filtered_bboxes = postprocessing.nms(detected_bboxes, conf_thresh=config['CONF_THRESH'], iou_thresh=config['IOU_THRESH']) for class_id, v in filtered_bboxes.items(): if args.classes is None or classes[class_id] in args.classes: for detection in v: label_bboxes(frame, detection['bbox'], class_id, detection['score']) pbar.update(1) cv2.imshow('Result', frame) if cv2.waitKey(1) & 0xFF == ord('q'): video.release() break print("Done") else: frame = cv2.imread(args.input) resized_frame = cv2.resize(frame, (config['IMAGE_SIZE'], config['IMAGE_SIZE']))