def forward(ctx, boxes, scores, threshold, top_k=200): if scores.size(0) == 0: return torch.tensor([], dtype=torch.int), torch.tensor(0) if scores.dim() == 1: scores = scores.unsqueeze(1) if not boxes.is_cuda: keep = torch_nms(boxes, scores.flatten(), threshold) else: keep = EXTENSIONS.nms(torch.cat((boxes, scores), dim=1), threshold, top_k) return keep, torch.tensor(keep.size(0))
def process_image(self, image_path=None, image=None, show_image=False): # 清除无用张量 torch.cuda.empty_cache() assert image_path is None or image is None if image_path: if self.contain_zh(image_path): image_cv2 = cv2.imdecode( np.fromfile(image_path, dtype=np.uint8), -1) else: image_cv2 = cv2.imread(image_path) else: image_cv2 = image # pre-processing image = self.preprocess(image_cv2) # post-processing detections = self.model(image).view(-1, 5) # scale each detection back up to the image if self.use_cuda: scale = torch.Tensor([ image_cv2.shape[1], image_cv2.shape[0], image_cv2.shape[1], image_cv2.shape[0] ]).cuda() else: scale = torch.Tensor([ image_cv2.shape[1], image_cv2.shape[0], image_cv2.shape[1], image_cv2.shape[0] ]).cpu() scores = detections[..., 0] boxes = detections[..., 1:] * scale # filter the boxes whose score is smaller than 0.8 keep_mask = (scores >= self.thresh) & (boxes[..., -1] > 2.0) scores = scores[keep_mask] boxes = boxes[keep_mask] # print(scores.max()) h, w = image_cv2.shape[0:2] # print(h,w) return_images = [] keep_idx = torch_nms(boxes, scores, iou_threshold=0.4) if len(keep_idx) > 0: keep_boxes = boxes[keep_idx].cpu().numpy() keep_scores = scores[keep_idx].cpu().numpy() for box, s in zip(keep_boxes, keep_scores): # 放大检测框 x_length, y_length = abs(box[0] - box[2]), abs(box[1] - box[3]) add_x = x_length * (base_config.face_enlarge - 1) / 2 add_y = y_length * (base_config.face_enlarge - 1) / 2 box_large = box + [-add_x, -add_y, add_x, add_y] if box_large[0] < 0: box_large[0] = 0 if box_large[1] < 0: box_large[1] = 0 if box_large[2] > w: box_large[2] = w if box_large[3] > h: box_large[3] = h box = np.array(box, np.int32) box_large = np.array(box_large, np.int32) crop_face = image_cv2[box_large[1]:box_large[3], box_large[0]:box_large[2], :] return_images.append([crop_face, round(s, 2)]) print("{} faces are detected in .".format(len(keep_idx))) if show_image: if len(keep_idx) > 0: keep_boxes = boxes[keep_idx].cpu().numpy() keep_scores = scores[keep_idx].cpu().numpy() for box, s in zip(keep_boxes, keep_scores): cv2.rectangle(image_cv2, (box[0], box[1]), (box[2], box[3]), color=(0, 0, 255), thickness=2) cv2.rectangle(image_cv2, (box[0], box[1] - 20), (box[0] + 80, box[1] - 2), color=(0, 255, 0), thickness=-1) cv2.putText(image_cv2, "{:.2f}".format(s), (box[0], box[1] - 2), cv2.FONT_HERSHEY_SIMPLEX, self.thresh, color=(255, 255, 255), thickness=2) cv2.imshow('image', image_cv2) cv2.waitKey(0) return return_images
]).cuda() else: scale = torch.Tensor([ img_cv2.shape[1], img_cv2.shape[0], img_cv2.shape[1], img_cv2.shape[0] ]).cpu() scores = detections[..., 0] boxes = detections[..., 1:] * scale # filter the boxes whose score is smaller than 0.8 keep_mask = (scores >= args.thresh) & (boxes[..., -1] > 2.0) scores = scores[keep_mask] boxes = boxes[keep_mask] # print(scores.max()) keep_idx = torch_nms(boxes, scores, iou_threshold=0.4) if len(keep_idx) > 0: keep_boxes = boxes[keep_idx].cpu().numpy() keep_scores = scores[keep_idx].cpu().numpy() for box, s in zip(keep_boxes, keep_scores): box = np.array(box, np.int32) cv2.rectangle(img_cv2, (box[0], box[1]), (box[2], box[3]), color=(0, 0, 255), thickness=2) cv2.rectangle(img_cv2, (box[0], box[1] - 20), (box[0] + 80, box[1] - 2), color=(0, 255, 0), thickness=-1) cv2.putText(img_cv2, "{:.2f}".format(s), (box[0], box[1] - 2),
def get_bounding_boxes(self, x, encoding=None, targets=None): from torchvision.ops import nms as torch_nms if encoding is None: encoding = self.encode_yolo(x) outputs = self.yolo_decoder(encoding) if targets is not None: yoloLossValue = self.yolo_loss(outputs, targets) else: yoloLossValue = 0 boxes = [] for output in outputs: # Get detected boxes_detected, labels, confidences, class-scores. boxes_normalized_all, class_labels_all, confidences_all, class_scores_all = pred_decode( output, prob_thresh=self.prob_thresh, conf_thresh=self.conf_thresh, ) if boxes_normalized_all.size(0) == 0: boxes.append(FloatTensor(outputs.shape[0], 2, 4)) continue # Apply non maximum supression for boxes of each class. boxes_normalized, class_labels, probs = [], [], [] for class_label in range(self.num_classes): mask = (class_labels_all == class_label) if torch.sum(mask) == 0: continue # if no box found, skip that class. boxes_normalized_masked = boxes_normalized_all[mask] class_labels_maked = class_labels_all[mask] confidences_masked = confidences_all[mask] class_scores_masked = class_scores_all[mask] ids = torch_nms(boxes_normalized_masked, confidences_masked, self.nms_thresh) boxes_normalized.append(boxes_normalized_masked[ids]) class_labels.append(class_labels_maked[ids]) probs.append(confidences_masked[ids] * class_scores_masked[ids]) boxes_normalized = torch.cat(boxes_normalized, 0) class_labels = torch.cat(class_labels, 0) probs = torch.cat(probs, 0) better_coordinates = FloatTensor(boxes_normalized.shape[0], 2, 4) translation = FloatTensor(boxes_normalized.shape[0], 2, 4) translation[:, 0, :].fill_(-40) translation[:, 1, :].fill_(40) center_x = (boxes_normalized[:, 0] + boxes_normalized[:, 2]) / 2 * WIDTH center_y = (boxes_normalized[:, 1] + boxes_normalized[:, 3]) / 2 * HEIGHT width = (boxes_normalized[:, 2] - boxes_normalized[:, 0]) * WIDTH height = (boxes_normalized[:, 3] - boxes_normalized[:, 1]) * HEIGHT x1 = center_x - width / 2 x2 = center_x + width / 2 x3 = center_x - width / 2 x4 = center_x + width / 2 y1 = center_y - height / 2 y2 = center_y + height / 2 y3 = center_y + height / 2 y4 = center_y - height / 2 better_coordinates[:, 0, 0] = x1 better_coordinates[:, 0, 1] = x2 better_coordinates[:, 0, 2] = x3 better_coordinates[:, 0, 3] = x4 better_coordinates[:, 1, 0] = y1 better_coordinates[:, 1, 1] = y2 better_coordinates[:, 1, 2] = y3 better_coordinates[:, 1, 3] = y4 better_coordinates[:, 1, :].mul_(-1) # shift back! better_coordinates += translation under_fourty = better_coordinates < -40 over_fourty = better_coordinates > 40 better_coordinates[under_fourty] = -40 better_coordinates[over_fourty] = 40 under_40 = better_coordinates < -40 over_40 = better_coordinates > 40 better_coordinates[under_40] = -40 better_coordinates[over_40] = 40 # reorder corners so it's clockwise from top left better_coordinates = better_coordinates[:, :, [0, 2, 3, 1]] boxes.append(better_coordinates) #print('got this incoming') #print(x.shape) #print('got these many boxes to look at {}'.format(len(boxes))) ##print('it has this many detections {} at one site'.format(boxes[0].shape[0])) #print("looks like this") #print(boxes[0]) return tuple(boxes), yoloLossValue