def __ro_net_detect(self, image, net_boxes, face_size): print(face_size) _img_dataset = [] # 将所有的net的box转成正方形 _net_boxes = tools.convert_to_square(net_boxes) for _box in _net_boxes: # _pnet_boxes [N, 5] _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) img = image.crop((_x1, _y1, _x2, _y2)) img = img.resize((face_size, face_size)) img_data = self.__image_transform(img) _img_dataset.append( img_data) # [N, 3, 24, 24] (array([...], array([...])) img_dataset = torch.stack(_img_dataset) # 有元组转换成张量 img_dataset = img_dataset.to(DEVICE) if face_size == 24: out_cls, out_offset = self.r_net(img_dataset) # [N, 1] [N, 4] elif face_size == 48: out_cls, out_offset = self.o_net(img_dataset) else: raise Exception("face_size not in [24, 48]!") out_cls = out_cls.cpu().detach().numpy() # [N. 1] out_offset = out_offset.cpu().detach().numpy() # [N, 4] # 选取符合条件的索引 idxs, _ = np.where(out_cls > 0.3) # [N,] _boxes = _net_boxes[idxs] # [N, 5] _x1 = _boxes[:, 0] _y1 = _boxes[:, 1] _x2 = _boxes[:, 2] _y2 = _boxes[:, 3] ow, oh = _x2 - _x1, _y2 - _y1 x1 = _x1 + ow * out_offset[idxs][:, 0] # [N, 1] y1 = _y1 + oh * out_offset[idxs][:, 1] # [N, 1] x2 = _x2 + ow * out_offset[idxs][:, 2] # [N, 1] y2 = _y2 + oh * out_offset[idxs][:, 3] # [N, 1] cls = out_cls[idxs][:, 0] # [N, 1] boxes = np.stack([x1, y1, x2, y2, cls], axis=1) if face_size == 24: return tools.nms(np.array(boxes), 0.3, False) else: # face_size = 48 return tools.nms(np.array(boxes), 0.3, True)
def get_bbox(self, image, multi_test=False, flip_test=False): """ :param image: 要预测的图片 :return: 返回NMS后的bboxes,存储格式为(xmin, ymin, xmax, ymax, score, class) """ if multi_test: test_input_sizes = self._train_input_sizes[::3] bboxes_list = [] for test_input_size in test_input_sizes: valid_scale = (0, np.inf) bboxes_list.append( self.__predict(image, test_input_size, valid_scale)) if flip_test: bboxes_flip = self.__predict(image[:, ::-1, :], test_input_size, valid_scale) bboxes_flip[:, [0, 2]] = image.shape[1] - bboxes_flip[:, [2, 0]] bboxes_list.append(bboxes_flip) bboxes = np.row_stack(bboxes_list) else: bboxes = self.__predict(image, self._test_input_size, (0, np.inf)) bboxes = tools.nms(bboxes, self._score_threshold, self._iou_threshold, method='nms') return bboxes
def __p_net_detect(self, image): bboxes = [] scale = 1 w, h = image.size min_side_len = min(w, h) while min_side_len >= 12: img_data = self.__image_transform(image) out_cls, out_offset = self.p_net(img_data.to(DEVICE).unsqueeze(0)) # out_cls[N, 1, H, W] out_offset[N. 4. H. W] 去掉梯度 out_cls, out_offset = out_cls[0][0].cpu().detach( ), out_offset[0].cpu().detach() idxs = torch.nonzero(torch.gt( out_cls, 0.6)) # out_cls > 0.6 idxy[idx, idy] boxes = self.__box(idxs, out_offset, out_cls, scale) bboxes.extend(boxes) scale *= 0.709 _w = int(w * scale) _h = int(h * scale) # 图像金字塔 image = image.resize((_w, _h)) min_side_len = min(_w, _h) # 这里是跳出循环的条件 return tools.nms(np.array(bboxes), 0.3, False)
def test_nms(self): boxes = torch.Tensor([[[0, 0, 4.1, 6.1], [0, 0, 4, 6], [7, 9, 20, 25]]]) obj_scores = torch.Tensor([[[0.9], [1.0], [0.1]]]) cls_scores = torch.Tensor([[[0.9, 0.1, 0.1], [0.9, 0.2, 0.3], [0.1, 0.1, 0.2]]]) batch_size = boxes.shape[1] scores = obj_scores * cls_scores classes = torch.arange(batch_size).view(boxes.shape[0], 1, cls_scores.shape[1]).repeat( 1, batch_size, 1) boxes = boxes.repeat(1, 1, cls_scores.shape[1]).view(batch_size, -1, boxes.shape[-1]) pruned = nms(scores.view(batch_size, -1), boxes, classes.view(batch_size, -1), iou_threshold=0.5, max_detections=6) score_pos = 1 box_pos = 2 class_pos = 3 score_value = pruned[score_pos][1][0].unsqueeze(axis=-1) box_value = pruned[box_pos][1][0] class_value = pruned[class_pos][1][0].unsqueeze(axis=-1) result = torch.cat((box_value, score_value, class_value), axis=-1).float() assert torch.equal(result, torch.Tensor([0, 0, 4, 6, 0.9, 0]).float())
def get_bbox(self, img, multi_test=False, flip_test=False): if multi_test: test_input_sizes = range(320, 640, 96) bboxes_list = [] for test_input_size in test_input_sizes: valid_scale = (0, np.inf) bboxes_list.append( self.__predict(img, test_input_size, valid_scale)) if flip_test: bboxes_flip = self.__predict(img[:, ::-1], test_input_size, valid_scale) bboxes_flip[:, [0, 2]] = img.shape[1] - bboxes_flip[:, [2, 0]] bboxes_list.append(bboxes_flip) bboxes = np.row_stack(bboxes_list) else: bboxes = self.__predict(img, self.val_shape, (0, np.inf)) # boxes (xmin, ymin, xmax, ymax, score, class) ################################## In case we want to visualize only the selected classes ################ # selected_classes = ['car', 'dog'] # selected_idxs = [ self.cfg.DATA['CLASSES'].index(clss) for clss in selected_classes] # mask = np.in1d(bboxes[:, 5], selected_idxs) # bboxes = bboxes[mask] ########################################################################################################### bboxes = nms( bboxes, self.conf_thresh, self.nms_thresh ) # why still self.conf_thresh? it already filter in the self.__predict return bboxes
def postprocess(pred_bbox, test_input_size, org_img_shape): conf_thres=0.1 pred_bbox = np.array(pred_bbox) pred_coor = pred_bbox[:, 0:4] pred_conf = pred_bbox[:, 4] pred_prob = pred_bbox[:, 5:] org_h, org_w = org_img_shape resize_ratio = min(1.0 * test_input_size / org_w, 1.0 * test_input_size / org_h) dw = (test_input_size - resize_ratio * org_w) / 2 dh = (test_input_size - resize_ratio * org_h) / 2 pred_coor[:, 0::2] = 1.0 * (pred_coor[:, 0::2] - dw) / resize_ratio pred_coor[:, 1::2] = 1.0 * (pred_coor[:, 1::2] - dh) / resize_ratio pred_coor = np.concatenate([np.maximum(pred_coor[:, :2], [0, 0]), np.minimum(pred_coor[:, 2:], [org_w - 1, org_h - 1])], axis=-1) invalid_mask = np.logical_or((pred_coor[:, 0] > pred_coor[:, 2]), (pred_coor[:, 1] > pred_coor[:, 3])) pred_coor[invalid_mask] = 0 bboxes_scale = np.sqrt(np.multiply.reduce(pred_coor[:, 2:4] - pred_coor[:, 0:2], axis=-1)) valid_scale=(0,np.inf) scale_mask = np.logical_and((valid_scale[0] < bboxes_scale), (bboxes_scale < valid_scale[1])) classes = np.argmax(pred_prob, axis=-1) scores = pred_conf * pred_prob[np.arange(len(pred_coor)), classes] score_mask = scores > conf_thres mask = np.logical_and(scale_mask, score_mask) coors = pred_coor[mask] scores = scores[mask] classes = classes[mask] bboxes = np.concatenate([coors, scores[:, np.newaxis], classes[:, np.newaxis]], axis=-1) bboxes = tools.nms(bboxes,conf_thres, 0.45, method='nms') return bboxes
def test_nms(self): boxes = torch.Tensor([ [0, 0, 4.1, 6.1], [0, 0, 4, 6], [7, 9, 20, 25] ]) obj_scores = torch.Tensor([[0.9], [1.0], [0.1]]) cls_scores = torch.Tensor([[0.9, 0.1, 0.1], [0.9, 0.2, 0.3], [0.1, 0.1, 0.2]]) predictions = torch.unsqueeze(torch.cat((boxes, obj_scores, cls_scores), axis=1), 0) pruned = nms(predictions, iou_threshold = 0.5, score_threshold = 0.5)[0] pruned = xyxy_to_xywh(pruned) assert torch.equal( pruned.float(), torch.Tensor([[0, 0, 4, 6, 0.9, 0]]).float() )
def __call__(self, deltas, scores, anchors, img_size, scale): """ :param deltas: 2d array, shape(n, 4) :param scores: 1d array, shape(n, ) :param anchors: 2d array, shape(n, 4) :param img_size: tuple, (h, w) :param scale: float :return: """ if self.parent_model.training: pre_nms_top_N = cfg.RPN_TRAIN_PRE_NMS_TOP_N post_nms_top_N = cfg.RPN_TRAIN_POST_NMS_TOP_N else: pre_nms_top_N = cfg.RPN_TEST_PRE_NMS_TOP_N post_nms_top_N = cfg.RPN_TEST_POST_NMS_TOP_N # bounding boxes regression with deltas proposals = tools.bbox_regression(anchors, deltas) # Clip boxes into image boundaries proposals = tools.clip_boxes(proposals, img_size) # Remove all boxes with any side smaller than min_size keep = tools.filter_boxes(proposals, self.min_size * scale) proposals = proposals[keep, :] scores = scores[keep] # Sort boxes in descending order by score order = scores.argsort()[::-1] if pre_nms_top_N > 0: order = order[:pre_nms_top_N] proposals = proposals[order, :] scores = scores[order] # NMS keep = tools.nms(np.hstack((proposals, scores.reshape(-1, 1))), self.nms_thresh) if post_nms_top_N > 0: keep = keep[:post_nms_top_N] rois = proposals[keep, :] scores = scores[keep] return rois, scores
def forward(self, x, scale, nms_thresh=0.3, score_thresh=0.01): img_size = x.size()[2:] feature_map = self.extractor(x) _, _, rois, rois_scores, _ = self.rpn(feature_map, img_size, scale) # Clip rois into image boundaries rois[:, :4:2] = np.clip(rois[:, :4:2], 0, img_size[0]) rois[:, 1:4:2] = np.clip(rois[:, 1:4:2], 0, img_size[1]) # Softmax on rois scores probs = F.softmax(torch.from_numpy(rois_scores)) probs = probs.numpy() # Only keep rois with scores greater than the threshold mask = probs > score_thresh boxes = rois[mask] scores = probs[mask] # NMS keep = tools.nms(np.hstack((boxes, scores.reshape(-1, 1))), nms_thresh) boxes = boxes[keep] scores = scores[keep] return boxes, scores
def __ro_net_detect(self, image, net_boxes, face_size): print(face_size) _img_dataset = [] # 将所有的net的box转成正方形 _net_boxes = tools.convert_to_square(net_boxes) for _box in _net_boxes: # _pnet_boxes [N, 5] _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) img = image.crop((_x1, _y1, _x2, _y2)) img = img.resize((face_size, face_size)) img_data = self.__image_transform(img) _img_dataset.append( img_data) # [N, 3, 24, 24] (array([...], array([...])) img_dataset = torch.stack(_img_dataset) # 有元组转换成张量 img_dataset = img_dataset.to(DEVICE) if face_size == 24: out_cls, out_offset = self.r_net(img_dataset) # [N, 1] [N, 4] # print(out_offset.shape) # out_offset = out_offset[:5] elif face_size == 48: out_cls, out_offset = self.o_net(img_dataset) else: raise Exception("face_size not in [24, 48]!") out_cls = out_cls.cpu().detach().numpy() # [N. 1] out_offset = out_offset.cpu().detach().numpy() # [N, 4] # 选取符合条件的索引 if face_size == 24: idxs, _ = np.where(out_cls > 0.7) # [N,] elif face_size == 48: idxs, _ = np.where(out_cls > 0.9999) # [N,] else: raise Exception("face size must be in [24, 48]") _boxes = _net_boxes[idxs] # [N, 5] _x1 = _boxes[:, 0] _y1 = _boxes[:, 1] _x2 = _boxes[:, 2] _y2 = _boxes[:, 3] ow, oh = _x2 - _x1, _y2 - _y1 x1 = _x1 + ow * out_offset[idxs][:, 0] # [N, 1] y1 = _y1 + oh * out_offset[idxs][:, 1] # [N, 1] x2 = _x2 + ow * out_offset[idxs][:, 2] # [N, 1] y2 = _y2 + oh * out_offset[idxs][:, 3] # [N, 1] cls = out_cls[idxs][:, 0] # [N, 1] if face_size == 24: boxes_24 = np.stack([x1, y1, x2, y2, cls], axis=1) return tools.nms(np.array(boxes_24), 0.3, False) elif face_size == 48: px1 = _x1 + ow * out_offset[idxs][:, 4] # [N, 1] py1 = _y1 + oh * out_offset[idxs][:, 5] # [N, 1] px2 = _x1 + ow * out_offset[idxs][:, 6] # [N, 1] py2 = _y1 + oh * out_offset[idxs][:, 7] # [N, 1] px3 = _x1 + ow * out_offset[idxs][:, 8] # [N, 1] py3 = _y1 + oh * out_offset[idxs][:, 9] # [N, 1] px4 = _x1 + ow * out_offset[idxs][:, 10] # [N, 1] py4 = _y1 + oh * out_offset[idxs][:, 11] # [N, 1] px5 = _x1 + ow * out_offset[idxs][:, 12] # [N, 1] py5 = _y1 + oh * out_offset[idxs][:, 13] # [N, 1] boxes_48 = np.stack([ x1, y1, x2, y2, cls, px1, py1, px2, py2, px3, py3, px4, py4, px5, py5 ], axis=1) # boxes_48 = np.stack([x1, y1, x2, y2, cls], axis=1) return tools.nms(np.array(boxes_48), 0.3, True) else: raise Exception("face size must be in [24, 48")
r"F:\workspace\7.YOLO\01.MyYolov3\data\images\3.jpg") while cap.isOpened(): ret, frame = cap.read() if ret: image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) draw = ImageDraw.Draw(image) _, _, s, pro_image = tools.convert_to_416x416(image, 416) img_tensor = img_transform(pro_image)[None, ...].cuda() out_value = detector(img_tensor, 0.6, cfg.ANCHORS_GROUP).cpu().detach() boxes = [] for j in range(cfg.CLASS_NUM): classify_mask = (out_value[..., -1] == j) _boxes = out_value[classify_mask] if _boxes.shape[0] != 0: boxes.append(tools.nms(_boxes)) for box in boxes: try: for i in box: c, xx1, yy1, xx2, yy2 = i[0:5] print(c, xx1, yy1, xx2, yy2) draw.rectangle((xx1 / s, yy1 / s, xx2 / s, yy2 / s)) except: continue frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) cv2.imshow("", frame) cv2.waitKey(0) cv2.destroyAllWindows() cap.release()
def test(self, image, pos): self.model.eval() if os.path.exists(self.path): checkpoint = self.load(self.path) self.model.load_state_dict(checkpoint['model']) print('************load model***************') self.model.eval() image = torch.unsqueeze(image, dim=0) feature, anchors = self.model.extract(image) classifier, regression = self.model.rpn1(feature) classifier = classifier[0] regression = regression[0] anchors = self.integrate(classifier, regression, anchors) classifier, regression = self.model.rpn2(feature) classifier = classifier[0] regression = regression[0] anchors = self.integrate(classifier, regression, anchors) # # classifier, regression = self.model.rpn3(feature) classifier = classifier[0] regression = regression[0] anchors = self.integrate(classifier, regression, anchors) classifier = F.softmax(classifier, dim=1) classifier = classifier.detach().numpy() regression = regression.detach().numpy() right_indices = np.argsort(classifier[:, 1])[::-1] num = np.where(classifier[:, 1] > 0.9)[0].shape[0] print(num) right_indices = right_indices[:num] # print(right_indices) # right_indices_cla = [2693, 2709, 2715, 3026, 3027, 3040] right_anchor = anchors[right_indices] right_regression = regression[right_indices] classifier = classifier[right_indices] right = right_anchor # print(right_regression) # right = target2src(right_regression, right_anchor) keep = nms(right, 0.5) right = right[keep, :] eva = evaluation(right, pos) eva_indices = np.argmax(eva, axis=1) eva = eva[np.arange(eva.shape[0]), eva_indices] # print(classifier[right_indices_cla, 1]) # print(classifier[right_indices[0:10], 1]) print(eva) # print(keep) # print(classifier[311, :]) return right