def __parse_annotation(self, annotation): """ 读取annotation中image_path对应的图片,并将该图片进行resize(不改变图片的高宽比) 获取annotation中所有的bbox,并将这些bbox的坐标(xmin, ymin, xmax, ymax)进行纠正, 使得纠正后bbox在resize后的图片中的相对位置与纠正前bbox在resize前的图片中的相对位置相同 :param annotation: 图片地址和bbox的坐标、类别, 如:image_path xmin,ymin,xmax,ymax,class_ind xmin,ymin,xmax,ymax,class_ind ... :return: image和bboxes bboxes的shape为(N, 5),其中N表示一站图中有N个bbox,5表示(xmin, ymin, xmax, ymax, class_ind) """ line = annotation.split() image_path = line[0] image = np.array(cv2.imread(image_path)) bboxes = np.array([map(int, box.split(',')) for box in line[1:]]) # 数据增强 image, bboxes = data_aug.random_horizontal_flip( np.copy(image), np.copy(bboxes)) image, bboxes = data_aug.random_crop(np.copy(image), np.copy(bboxes)) image, bboxes = data_aug.random_translate(np.copy(image), np.copy(bboxes)) # 进行resize操作, 不改变原图比例 image, bboxes = img_preprocess2( np.copy(image), np.copy(bboxes), (self.__train_input_size, self.__train_input_size), True) return image, bboxes
def __get_bbox(self, image): """ :param image: 要预测的图片 :return: 返回NMS后的bboxes,存储格式为(xmin, ymin, xmax, ymax, score, class) """ org_image = np.copy(image) org_h, org_w, _ = org_image.shape yolo_input = utils.img_preprocess2(image, None, (self.__test_input_size, self.__test_input_size), False) yolo_input = yolo_input[np.newaxis, ...] pred_sbbox, pred_mbbox, pred_lbbox = self.__sess.run( [self.__pred_sbbox, self.__pred_mbbox, self.__pred_lbbox], feed_dict={ self.__input_data: yolo_input, self.__training: False } ) sbboxes = self.__convert_pred(pred_sbbox, (org_h, org_w), self.__valid_scales[0]) mbboxes = self.__convert_pred(pred_mbbox, (org_h, org_w), self.__valid_scales[1]) lbboxes = self.__convert_pred(pred_lbbox, (org_h, org_w), self.__valid_scales[2]) # sbboxes = self.__valid_scale_filter(sbboxes, self.__valid_scales[0]) # mbboxes = self.__valid_scale_filter(mbboxes, self.__valid_scales[1]) # lbboxes = self.__valid_scale_filter(lbboxes, self.__valid_scales[2]) bboxes = np.concatenate([sbboxes, mbboxes, lbboxes], axis=0) bboxes = utils.nms(bboxes, self.__score_threshold, self.__iou_threshold, method='nms') return bboxes
def __predict(self, image, test_input_size, valid_scale): org_image = np.copy(image) org_h, org_w, _ = org_image.shape yolo_input = utils.img_preprocess2(image, None, (test_input_size, test_input_size), False) yolo_input = yolo_input[np.newaxis, ...] pred_sbbox, pred_mbbox, pred_lbbox = self.__sess.run( [self.__pred_sbbox, self.__pred_mbbox, self.__pred_lbbox], feed_dict={ self.__input_data: yolo_input, self.__training: False } ) pred_bbox = np.concatenate([np.reshape(pred_sbbox, (-1, 5 + self.__num_classes)), np.reshape(pred_mbbox, (-1, 5 + self.__num_classes)), np.reshape(pred_lbbox, (-1, 5 + self.__num_classes))], axis=0) bboxes = self.__convert_pred(pred_bbox, test_input_size, (org_h, org_w), valid_scale) return bboxes