def detect_image(self, image_id, image): self.confidence = 0.01 self.bbox_util._nms_thresh = 0.5 f = open("./input/detection-results/" + image_id + ".txt", "w") image_shape = np.array(np.shape(image)[0:2]) crop_img, x_offset, y_offset = letterbox_image( image, [self.model_image_size[0], self.model_image_size[1]]) photo = np.array(crop_img, dtype=np.float64) # 图片预处理,归一化 photo = preprocess_input( np.reshape(photo, [ 1, self.model_image_size[0], self.model_image_size[1], self.model_image_size[2] ])) preds = self.retinanet_model.predict(photo) # 将预测结果进行解码 results = self.bbox_util.detection_out( preds, self.prior, confidence_threshold=self.confidence) if len(results[0]) <= 0: return image results = np.array(results) # 筛选出其中得分高于confidence的框 det_label = results[0][:, 5] det_conf = results[0][:, 4] det_xmin, det_ymin, det_xmax, det_ymax = results[0][:, 0], results[ 0][:, 1], results[0][:, 2], results[0][:, 3] top_indices = [ i for i, conf in enumerate(det_conf) if conf >= self.confidence ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( det_xmin[top_indices], -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims( det_xmax[top_indices], -1), np.expand_dims(det_ymax[top_indices], -1) # 去掉灰条 boxes = retinanet_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c)] score = str(top_conf[i]) top, left, bottom, right = boxes[i] f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str( int(top)), str(int(right)), str(int(bottom)))) f.close() return
def detect_image(self, image_id, image): self.confidence = 0.01 self.iou = 0.5 f = open("./input/detection-results/" + image_id + ".txt", "w") image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array(letterbox_image(image, self.image_size)) photo = np.array(crop_img, dtype=np.float32) photo = np.transpose(preprocess_input(photo), (2, 0, 1)) images = [] images.append(photo) images = np.asarray(images) with torch.no_grad(): images = torch.from_numpy(images) if self.cuda: images = images.cuda() _, regression, classification, anchors = self.net(images) regression = decodebox(regression, anchors, images) detection = torch.cat([regression, classification], axis=-1) batch_detections = non_max_suppression(detection, len(self.class_names), conf_thres=self.confidence, nms_thres=0.3) try: batch_detections = batch_detections[0].cpu().numpy() except: return image top_index = batch_detections[:, 4] > self.confidence top_conf = batch_detections[top_index, 4] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 boxes = retinanet_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax, np.array(self.image_size), image_shape) for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = str(top_conf[i]) top, left, bottom, right = boxes[i] f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str( int(top)), str(int(right)), str(int(bottom)))) f.close() return
def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize #---------------------------------------------------------# crop_img = np.array( letterbox_image(image, [self.input_shape[1], self.input_shape[0]])) photo = np.array(crop_img, dtype=np.float32) photo = np.transpose(preprocess_input(photo), (2, 0, 1)) with torch.no_grad(): images = torch.from_numpy(np.asarray([photo])) if self.cuda: images = images.cuda() #---------------------------------------------------------# # 传入网络当中进行预测 #---------------------------------------------------------# _, regression, classification, anchors = self.net(images) #-----------------------------------------------------------# # 将预测结果进行解码 #-----------------------------------------------------------# regression = decodebox(regression, anchors, images) detection = torch.cat([regression, classification], axis=-1) batch_detections = non_max_suppression(detection, len(self.class_names), conf_thres=self.confidence, nms_thres=self.iou) #--------------------------------------# # 如果没有检测到物体,则返回原图 #--------------------------------------# try: batch_detections = batch_detections[0].cpu().numpy() except: return image #-----------------------------------------------------------# # 筛选出其中得分高于confidence的框 #-----------------------------------------------------------# top_index = batch_detections[:, 4] > self.confidence top_conf = batch_detections[top_index, 4] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) #-----------------------------------------------------------# # 去掉灰条部分 #-----------------------------------------------------------# boxes = retinanet_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.input_shape[0], self.input_shape[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = max( (np.shape(image)[0] + np.shape(image)[1]) // self.input_shape[0], 1) for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[self.class_names.index( predicted_class)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) crop_img, x_offset, y_offset = letterbox_image( image, [self.model_image_size[0], self.model_image_size[1]]) photo = np.array(crop_img, dtype=np.float64) # 图片预处理,归一化 photo = preprocess_input( np.reshape(photo, [ 1, self.model_image_size[0], self.model_image_size[1], self.model_image_size[2] ])) preds = self.retinanet_model.predict(photo) # 将预测结果进行解码 results = self.bbox_util.detection_out( preds, self.prior, confidence_threshold=self.confidence) if len(results[0]) <= 0: return image results = np.array(results) # 筛选出其中得分高于confidence的框 det_label = results[0][:, 5] det_conf = results[0][:, 4] det_xmin, det_ymin, det_xmax, det_ymax = results[0][:, 0], results[ 0][:, 1], results[0][:, 2], results[0][:, 3] top_indices = [ i for i, conf in enumerate(det_conf) if conf >= self.confidence ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( det_xmin[top_indices], -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims( det_xmax[top_indices], -1), np.expand_dims(det_ymax[top_indices], -1) # 去掉灰条 boxes = retinanet_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0] for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c)] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[int(c)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[int(c)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
def get_FPS(self, image, test_interval): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize #---------------------------------------------------------# crop_img = np.array( letterbox_image(image, [self.input_shape[1], self.input_shape[0]])) photo = np.array(crop_img, dtype=np.float32) photo = np.transpose(preprocess_input(photo), (2, 0, 1)) with torch.no_grad(): images = torch.from_numpy(np.asarray([photo])) if self.cuda: images = images.cuda() _, regression, classification, anchors = self.net(images) regression = decodebox(regression, anchors, images) detection = torch.cat([regression, classification], axis=-1) batch_detections = non_max_suppression(detection, len(self.class_names), conf_thres=self.confidence, nms_thres=self.iou) try: batch_detections = batch_detections[0].cpu().numpy() top_index = batch_detections[:, 4] > self.confidence top_conf = batch_detections[top_index, 4] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) boxes = retinanet_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.input_shape[0], self.input_shape[1]]), image_shape) except: pass t1 = time.time() for _ in range(test_interval): with torch.no_grad(): _, regression, classification, anchors = self.net(images) regression = decodebox(regression, anchors, images) detection = torch.cat([regression, classification], axis=-1) batch_detections = non_max_suppression( detection, len(self.class_names), conf_thres=self.confidence, nms_thres=self.iou) try: batch_detections = batch_detections[0].cpu().numpy() top_index = batch_detections[:, 4] > self.confidence top_conf = batch_detections[top_index, 4] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims( top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) boxes = retinanet_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.input_shape[0], self.input_shape[1]]), image_shape) except: pass t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time