def get_FPS(self, image, test_interval): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize #---------------------------------------------------------# crop_img = letterbox_image(image, [self.input_shape[0], self.input_shape[1]]) #----------------------------------------------------------------------------------# # 将RGB转化成BGR,这是因为原始的centernet_hourglass权值是使用BGR通道的图片训练的 #----------------------------------------------------------------------------------# photo = np.array(crop_img, dtype=np.float32)[:, :, ::-1] #-----------------------------------------------------------# # 图片预处理,归一化。获得的photo的shape为[1, 512, 512, 3] #-----------------------------------------------------------# photo = np.reshape( preprocess_image(photo), [1, self.input_shape[0], self.input_shape[1], self.input_shape[2]]) preds = self.get_pred(photo).numpy() if self.nms: preds = np.array(nms(preds, self.nms_threhold)) if len(preds[0]) > 0: preds[0][:, 0:4] = preds[0][:, 0:4] / (self.input_shape[0] / 4) det_label = preds[0][:, -1] det_conf = preds[0][:, -2] det_xmin, det_ymin, det_xmax, det_ymax = preds[0][:, 0], preds[ 0][:, 1], preds[0][:, 2], preds[0][:, 3] top_indices = [ i for i, conf in enumerate(det_conf) if conf >= self.confidence ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( det_xmin[top_indices], -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims( det_xmax[top_indices], -1), np.expand_dims(det_ymax[top_indices], -1) boxes = centernet_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.input_shape[0], self.input_shape[1]]), image_shape) t1 = time.time() for _ in range(test_interval): preds = self.get_pred(photo).numpy() if self.nms: preds = np.array(nms(preds, self.nms_threhold)) if len(preds[0]) > 0: preds[0][:, 0:4] = preds[0][:, 0:4] / (self.input_shape[0] / 4) det_label = preds[0][:, -1] det_conf = preds[0][:, -2] det_xmin, det_ymin, det_xmax, det_ymax = preds[0][:, 0], preds[ 0][:, 1], preds[0][:, 2], preds[0][:, 3] top_indices = [ i for i, conf in enumerate(det_conf) if conf >= self.confidence ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( det_xmin[top_indices], -1), np.expand_dims( det_ymin[top_indices], -1), np.expand_dims( det_xmax[top_indices], -1), np.expand_dims(det_ymax[top_indices], -1) boxes = centernet_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.input_shape[0], self.input_shape[1]]), image_shape) t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def detect_image(self, image): #---------------------------------------------------# # 对输入图像进行一个备份,后面用于绘图 #---------------------------------------------------# old_image = image.copy() image = np.array(image, np.float32) im_height, im_width, _ = np.shape(image) #---------------------------------------------------# # 计算scale,用于将获得的预测框转换成原图的高宽 #---------------------------------------------------# scale = [ np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0] ] scale_for_landmarks = [ np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0] ] #---------------------------------------------------------# # letterbox_image可以给图像增加灰条,实现不失真的resize #---------------------------------------------------------# if self.letterbox_image: image = letterbox_image(image, [self.input_shape[1], self.input_shape[0]]) else: self.anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors() #-----------------------------------------------------------# # 图片预处理,归一化。 #-----------------------------------------------------------# photo = np.expand_dims(preprocess_input(image), 0) preds = self.retinaface.predict(photo) #-----------------------------------------------------------# # 将预测结果进行解码 #-----------------------------------------------------------# results = self.bbox_util.detection_out( preds, self.anchors, confidence_threshold=self.confidence) #--------------------------------------# # 如果没有检测到物体,则返回原图 #--------------------------------------# if len(results) <= 0: return old_image results = np.array(results) #---------------------------------------------------------# # 如果使用了letterbox_image的话,要把灰条的部分去除掉。 #---------------------------------------------------------# if self.letterbox_image: results = retinaface_correct_boxes( results, np.array([self.input_shape[0], self.input_shape[1]]), np.array([im_height, im_width])) results[:, :4] = results[:, :4] * scale results[:, 5:] = results[:, 5:] * scale_for_landmarks for b in results: text = "{:.4f}".format(b[4]) b = list(map(int, b)) # b[0]-b[3]为人脸框的坐标,b[4]为得分 cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) cx = b[0] cy = b[1] + 12 cv2.putText(old_image, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) print(b[0], b[1], b[2], b[3], b[4]) # b[5]-b[14]为人脸关键点的坐标 cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4) cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4) cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4) cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4) cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4) return old_image
def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array( letterbox_image( image, (self.model_image_size[0], self.model_image_size[1]))) photo = np.array(crop_img, dtype=np.float32) photo /= 255.0 photo = np.transpose(photo, (2, 0, 1)) photo = photo.astype(np.float32) images = [] images.append(photo) images = np.asarray(images) with torch.no_grad(): images = torch.from_numpy(images) if self.cuda: images = images.cuda() outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=0.3) try: batch_detections = batch_detections[0].cpu().numpy() except: return image top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0] for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) oo = [bottom - top, right - left] # 所画出框框的长和宽 # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') class_score = {predicted_class: '%.2f' % score} # print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[self.class_names.index( predicted_class)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw try: return image, oo, class_score except: return image
def get_FPS(self, image, test_interval): image_shape = np.array(np.shape(image)[0:2]) # ---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 # ---------------------------------------------------------# if self.letterbox_image: crop_img = np.array(letterbox_image(image, (self.model_image_size[1], self.model_image_size[0]))) else: crop_img = image.convert('RGB') crop_img = crop_img.resize((self.model_image_size[1], self.model_image_size[0]), Image.BICUBIC) photo = np.array(crop_img, dtype=np.float32) / 255.0 photo = np.transpose(photo, (2, 0, 1)) # ---------------------------------------------------------# # 添加上batch_size维度 # ---------------------------------------------------------# images = [photo] with torch.no_grad(): images = torch.from_numpy(np.asarray(images)) if self.cuda: images = images.cuda() outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=self.iou) try: batch_detections = batch_detections[0].cpu().numpy() top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:, 0], -1), np.expand_dims( top_bboxes[:, 1], -1), np.expand_dims(top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) if self.letterbox_image: boxes = yolo_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) else: top_xmin = top_xmin / self.model_image_size[1] * image_shape[1] top_ymin = top_ymin / self.model_image_size[0] * image_shape[0] top_xmax = top_xmax / self.model_image_size[1] * image_shape[1] top_ymax = top_ymax / self.model_image_size[0] * image_shape[0] boxes = np.concatenate([top_ymin, top_xmin, top_ymax, top_xmax], axis=-1) except: pass t1 = time.time() for _ in range(test_interval): with torch.no_grad(): outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=self.iou) try: batch_detections = batch_detections[0].cpu().numpy() top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:, 0], -1), np.expand_dims( top_bboxes[:, 1], -1), np.expand_dims(top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) if self.letterbox_image: boxes = yolo_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) else: top_xmin = top_xmin / self.model_image_size[1] * image_shape[1] top_ymin = top_ymin / self.model_image_size[0] * image_shape[0] top_xmax = top_xmax / self.model_image_size[1] * image_shape[1] top_ymax = top_ymax / self.model_image_size[0] * image_shape[0] boxes = np.concatenate([top_ymin, top_xmin, top_ymax, top_xmax], axis=-1) except: pass t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def detect_image(self, image): start = timer() # convert img_size to input_size new_image_size = (self.model_image_size[0], self.model_image_size[1]) boxed_image = letterbox_image(image, new_image_size) image_data = np.array(boxed_image, dtype='float32') image_data /= 255. image_data = np.expand_dims(image_data, 0) # sess.run out_boxes, out_scores, out_classes = self.sess.run( [self.boxes, self.scores, self.classes], feed_dict={ self.yolo_model.input: image_data, self.input_image_shape: [image.size[1], image.size[0]], K.learning_phase(): 0 }) # print(out_scores) # print(out_boxes) print('Found {} boxes for {}'.format(len(out_boxes), 'img')) # starting draw bounding boxes font = ImageFont.truetype(font='font/simhei.ttf', size=np.floor(2e-2 * image.size[1] + 0.5).astype('int32')) # thickness of bounding box and this thickness is changing according to img_size thickness = (image.size[0] + image.size[1]) // 500 for i, c in list(enumerate(out_classes)): predicted_class = self.class_names[c] box = out_boxes[i] score = out_scores[i] top, left, bottom, right = box top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) right = min(image.size[0], np.floor(right + 0.5).astype('int32')) label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c]) draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw end = timer() print('detect time:', end - start) return image
def detect_image(self, image_id, image): self.confidence = 0.01 self.iou = 0.5 f = open("./input/detection-results/" + image_id + ".txt", "w") image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize #---------------------------------------------------------# crop_img = np.array( letterbox_image( image, (self.model_image_size[1], self.model_image_size[0]))) photo = np.array(crop_img, dtype=np.float32) / 255.0 photo = np.transpose(photo, (2, 0, 1)) #---------------------------------------------------------# # 添加上batch_size维度 #---------------------------------------------------------# images = [photo] with torch.no_grad(): images = torch.from_numpy(np.asarray(images)) if self.cuda: images = images.cuda() #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) #---------------------------------------------------------# # 将预测框进行堆叠,然后进行非极大抑制 #---------------------------------------------------------# output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=self.iou) #---------------------------------------------------------# # 如果没有检测出物体,返回原图 #---------------------------------------------------------# try: batch_detections = batch_detections[0].cpu().numpy() except: return image #---------------------------------------------------------# # 对预测框进行得分筛选 #---------------------------------------------------------# top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) #-----------------------------------------------------------------# # 在图像传入网络预测前会进行letterbox_image给图像周围添加灰条 # 因此生成的top_bboxes是相对于有灰条的图像的 # 我们需要对其进行修改,去除灰条的部分。 #-----------------------------------------------------------------# boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = str(top_conf[i]) top, left, bottom, right = boxes[i] f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str( int(top)), str(int(right)), str(int(bottom)))) f.close() return
def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array(letterbox_image(image, self.image_size)) photo = np.array(crop_img, dtype=np.float32) photo = np.transpose(preprocess_input(photo), (2, 0, 1)) images = [] images.append(photo) images = np.asarray(images) with torch.no_grad(): images = torch.from_numpy(images) if self.cuda: images = images.cuda() _, regression, classification, anchors = self.net(images) regression = decodebox(regression, anchors, images) detection = torch.cat([regression, classification], axis=-1) batch_detections = non_max_suppression(detection, len(self.class_names), conf_thres=self.confidence, nms_thres=self.iou) try: batch_detections = batch_detections[0].cpu().numpy() except: return image top_index = batch_detections[:, 4] > self.confidence top_conf = batch_detections[top_index, 4] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 boxes = retinanet_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax, np.array(self.image_size), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.image_size[0] for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[self.class_names.index( predicted_class)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
def detect_image(self, image_id, image): f = open("./input/detection-results/" + image_id + ".txt", "w") self.confidence = 0.01 self.nms_threhold = 0.5 image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize #---------------------------------------------------------# crop_img = letterbox_image(image, [self.input_shape[0], self.input_shape[1]]) #----------------------------------------------------------------------------------# # 将RGB转化成BGR,这是因为原始的centernet_hourglass权值是使用BGR通道的图片训练的 #----------------------------------------------------------------------------------# photo = np.array(crop_img, dtype=np.float32)[:, :, ::-1] #-----------------------------------------------------------# # 图片预处理,归一化。获得的photo的shape为[1, 512, 512, 3] #-----------------------------------------------------------# photo = np.reshape( preprocess_image(photo), [1, self.input_shape[0], self.input_shape[1], self.input_shape[2]]) preds = self.get_pred(photo).numpy() #-------------------------------------------------------# # 对于centernet网络来讲,确立中心非常重要。 # 对于大目标而言,会存在许多的局部信息。 # 此时对于同一个大目标,中心点比较难以确定。 # 使用最大池化的非极大抑制方法无法去除局部框 # 所以我还是写了另外一段对框进行非极大抑制的代码 # 实际测试中,hourglass为主干网络时有无额外的nms相差不大,resnet相差较大。 #-------------------------------------------------------# if self.nms: preds = np.array(nms(preds, self.nms_threhold)) if len(preds[0]) <= 0: return #-----------------------------------------------------------# # 将预测结果转换成小数的形式 #-----------------------------------------------------------# preds[0][:, 0:4] = preds[0][:, 0:4] / (self.input_shape[0] / 4) det_label = preds[0][:, -1] det_conf = preds[0][:, -2] det_xmin, det_ymin, det_xmax, det_ymax = preds[0][:, 0], preds[ 0][:, 1], preds[0][:, 2], preds[0][:, 3] #-----------------------------------------------------------# # 筛选出其中得分高于confidence的框 #-----------------------------------------------------------# top_indices = [ i for i, conf in enumerate(det_conf) if conf >= self.confidence ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( det_xmin[top_indices], -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims( det_xmax[top_indices], -1), np.expand_dims(det_ymax[top_indices], -1) #-----------------------------------------------------------# # 去掉灰条部分 #-----------------------------------------------------------# boxes = centernet_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.input_shape[0], self.input_shape[1]]), image_shape) for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c)] score = str(top_conf[i]) top, left, bottom, right = boxes[i] f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str( int(top)), str(int(right)), str(int(bottom)))) f.close() return
def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize #---------------------------------------------------------# crop_img = letterbox_image(image, [self.image_size[0], self.image_size[1]]) #----------------------------------------------------------------------------------# # 将RGB转化成BGR,这是因为原始的centernet_hourglass权值是使用BGR通道的图片训练的 #----------------------------------------------------------------------------------# photo = np.array(crop_img, dtype=np.float32)[:, :, ::-1] #-----------------------------------------------------------# # 图片预处理,归一化。获得的photo的shape为[1, 512, 512, 3] #-----------------------------------------------------------# photo = np.reshape( np.transpose(preprocess_image(photo), (2, 0, 1)), [1, self.image_size[2], self.image_size[0], self.image_size[1]]) with torch.no_grad(): images = Variable( torch.from_numpy(np.asarray(photo)).type(torch.FloatTensor)) if self.cuda: images = images.cuda() outputs = self.centernet(images) if self.backbone == 'hourglass': outputs = [ outputs[-1]["hm"].sigmoid(), outputs[-1]["wh"], outputs[-1]["reg"] ] #-----------------------------------------------------------# # 利用预测结果进行解码 #-----------------------------------------------------------# outputs = decode_bbox(outputs[0], outputs[1], outputs[2], self.image_size, self.confidence, self.cuda) #-------------------------------------------------------# # 对于centernet网络来讲,确立中心非常重要。 # 对于大目标而言,会存在许多的局部信息。 # 此时对于同一个大目标,中心点比较难以确定。 # 使用最大池化的非极大抑制方法无法去除局部框 # 所以我还是写了另外一段对框进行非极大抑制的代码 # 实际测试中,hourglass为主干网络时有无额外的nms相差不大,resnet相差较大。 #-------------------------------------------------------# try: if self.nms: outputs = np.array(nms(outputs, self.nms_threhold)) except: pass output = outputs[0] if len(output) <= 0: return image batch_boxes, det_conf, det_label = output[:, : 4], output[:, 4], output[:, 5] det_xmin, det_ymin, det_xmax, det_ymax = batch_boxes[:, 0], batch_boxes[:, 1], batch_boxes[:, 2], batch_boxes[:, 3] #-----------------------------------------------------------# # 筛选出其中得分高于confidence的框 #-----------------------------------------------------------# top_indices = [ i for i, conf in enumerate(det_conf) if conf >= self.confidence ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( det_xmin[top_indices], -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims( det_xmax[top_indices], -1), np.expand_dims(det_ymax[top_indices], -1) #-----------------------------------------------------------# # 去掉灰条部分 #-----------------------------------------------------------# boxes = centernet_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.image_size[0], self.image_size[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = max( (np.shape(image)[0] + np.shape(image)[1]) // self.image_size[0], 1) for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c)] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[int(c)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[int(c)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
def detect_image(self, image): start = timer() # 调整图片使其符合输入要求 new_image_size = (self.model_image_size[1], self.model_image_size[0]) boxed_image = letterbox_image(image, new_image_size) image_data = np.array(boxed_image, dtype='float32') image_data /= 255. image_data = np.expand_dims(image_data, 0) # Add batch dimension. if self.eager: # 预测结果 input_image_shape = np.expand_dims( np.array([image.size[1], image.size[0]], dtype='float32'), 0) out_boxes, out_scores, out_classes = self.yolo_model.predict( [image_data, input_image_shape]) else: # 预测结果 out_boxes, out_scores, out_classes = self.sess.run( [self.boxes, self.scores, self.classes], feed_dict={ self.yolo_model.input: image_data, self.input_image_shape: [image.size[1], image.size[0]], K.learning_phase(): 0 }) print('Found {} boxes for {}'.format(len(out_boxes), 'img')) # 设置字体 font = ImageFont.truetype(font='font/simhei.ttf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) thickness = (image.size[0] + image.size[1]) // 300 small_pic = [] for i, c in list(enumerate(out_classes)): predicted_class = self.class_names[c] box = out_boxes[i] score = out_scores[i] top, left, bottom, right = box top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) right = min(image.size[0], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw end = timer() print(end - start) return image
def detect_image(self, image_id, image): self.confidence = 0.01 f = open("./input/detection-results/" + image_id + ".txt", "w") image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# if self.letterbox_image: crop_img = np.array( letterbox_image( image, (self.model_image_size[1], self.model_image_size[0]))) else: crop_img = image.convert('RGB') crop_img = crop_img.resize( (self.model_image_size[1], self.model_image_size[0]), Image.BICUBIC) photo = np.array(crop_img, dtype=np.float64) #-----------------------------------------------------------# # 图片预处理,归一化。 #-----------------------------------------------------------# photo = preprocess_input( np.reshape(photo, [ 1, self.model_image_size[0], self.model_image_size[1], self.model_image_size[2] ])) preds = self.m2det.predict(photo) #-----------------------------------------------------------# # 将预测结果进行解码 #-----------------------------------------------------------# results = self.bbox_util.detection_out( preds, self.prior, confidence_threshold=self.confidence) #--------------------------------------# # 如果没有检测到物体,则返回原图 #--------------------------------------# if len(results[0]) <= 0: return image #-----------------------------------------------------------# # 筛选出其中得分高于confidence的框 #-----------------------------------------------------------# det_label = results[0][:, 0] det_conf = results[0][:, 1] det_xmin, det_ymin, det_xmax, det_ymax = results[0][:, 2], results[ 0][:, 3], results[0][:, 4], results[0][:, 5] top_indices = [ i for i, conf in enumerate(det_conf) if conf >= self.confidence ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( det_xmin[top_indices], -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims( det_xmax[top_indices], -1), np.expand_dims(det_ymax[top_indices], -1) #-----------------------------------------------------------# # 去掉灰条部分 #-----------------------------------------------------------# if self.letterbox_image: boxes = m2det_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) else: top_xmin = top_xmin * image_shape[1] top_ymin = top_ymin * image_shape[0] top_xmax = top_xmax * image_shape[1] top_ymax = top_ymax * image_shape[0] boxes = np.concatenate([top_ymin, top_xmin, top_ymax, top_xmax], axis=-1) for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c) - 1] score = str(top_conf[i]) top, left, bottom, right = boxes[i] f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str( int(top)), str(int(right)), str(int(bottom)))) f.close() return
def detect_image(self, image, trfn, carn, lrx, rrx, carnums, carlist, sy, slx, srx, lx, ly): run_a_red_light = 0 totalcarn = 0 #list_num=len(carlist) #start = timer() # 调整图片使其符合输入要求 new_image_size = (self._defaults["model_image_size"][0], self._defaults["model_image_size"][1]) boxed_image = letterbox_image(image, new_image_size) image_data = np.array(boxed_image, dtype='float32') image_data /= 255. image_data = np.expand_dims(image_data, 0) # Add batch dimension. # 预测结果 out_boxes, out_scores, out_classes = self.sess.run( [self.boxes, self.scores, self.classes], feed_dict={ self.yolo_model.input: image_data, self.input_image_shape: [image.size[1], image.size[0]], # K.learning_phase(): 0 }) print('Found {} boxes for {}'.format(len(out_boxes), 'img')) font = ImageFont.truetype('font/simhei.ttf', 28) for i, c in list(enumerate(out_classes)): if i == 1: trfn = trfn + 1 outputpath = trafficoutputpath img = image.crop((lx - 15, ly - 30, lx + 15, ly + 30)) img.save(outputpath + str(trfn) + ".jpg") temp = check1(outputpath + str(trfn) + ".jpg") draw = ImageDraw.Draw(image) if temp == 'red': draw.rectangle((lx - 15, ly - 30, lx + 15, ly + 30), outline="red", width=2) draw.rectangle((lx - 15, ly - 55, lx + 70, ly - 30), fill="white") draw.text((lx - 15, ly - 55), "red", fill=(0, 0, 0), font=font) f1 = open(resultpath, "a", encoding='utf-8') f2 = open(run_a_red_lightpath(), "a", encoding='utf-8') f3 = open(Road_ROOTpath() + 'all_illegal_car_info.txt', "a+", encoding='utf-8') for cn, cf in list(enumerate(out_classes)): if cf != 2: continue predicted_class = self.class_names[cf] box = out_boxes[cn] score = out_scores[cn] top, left, bottom, right = box top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) right = min(image.size[0], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) x = left y1 = top y2 = bottom if cf == 2 and x < srx and (x > slx and y2 < 1080) and y1 > 500: carn = carn + 1 car_outputpath = caroutputpath img = image.crop((left, top, right, bottom)) img.save(car_outputpath + str(carn) + ".jpg") # if y>500 : ch = cartag(car_outputpath + str(carn) + ".jpg") f1.write(ch + "\n") draw.rectangle((left, top, right, bottom), outline=self.colors[cf], width=2) draw.rectangle([ tuple(text_origin), tuple(text_origin + label_size) ], fill=self.colors[cf]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) draw.rectangle((right, top, right + 150, top + 40), fill=self.colors[cf]) draw.text((right, top, right + 30, top + 60), ch, fill=(0, 0, 0), font=font) if ((top + bottom) / 2 - 50) < sy: if ch is not '00000' and ch[1] is not '1': f3.write(ch + " 闯红灯" + "\n") f2.write(ch + " 闯红灯" + "\n") run_a_red_light += 1 draw.rectangle( (right, top + 40, right + 100, top + 80), fill='red') draw.text( (right, top + 40, right + 60, top + 140), "闯红灯", fill=(0, 0, 0), font=font) if ch is not '00000' and ch[1] is not '1': image.save(run_a_red_light_img_path() + ch + '_闯红灯' + ".jpg") # f3.close() # f2.close() # f1.close() else: draw.rectangle((lx - 15, ly - 30, lx + 15, ly + 30), outline="green", width=2) draw.rectangle((lx - 15, ly - 55, lx + 70, ly - 30), fill="white") draw.text((lx - 15, ly - 55), "green", fill=(0, 0, 0), font=font) del draw if c == 9: continue predicted_class = self.class_names[c] box = out_boxes[i] score = out_scores[i] top, left, bottom, right = box top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) right = min(image.size[0], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) draw.rectangle((left, top, right, bottom), outline=self.colors[c], width=2) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) draw.rectangle((left, top, right, bottom), outline=self.colors[c], width=2) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) if c == 2 or c == 3 or c == 5 or c == 7: totalcarn += 1 del draw if c == 2 and bottom > 950 and left > slx: carnums += 1 img = image.crop((left, top, right, bottom)) img.save(caridpath + str(carnums) + '.jpg') carid = cartag(caridpath + str(carnums) + '.jpg') draw = ImageDraw.Draw(image) draw.rectangle((right, top, right + 150, top + 40), fill="green") draw.text((right, top, right + 30, top + 60), carid, fill=(0, 0, 0), font=font) if carid in carlist: continue else: if carid != '00000' and len(carid) == 7: carlist.append(carid) draw = ImageDraw.Draw(image) draw.rectangle((1300, 150, 1900, 200), fill="white") draw.text((1300, 160, 1900, 180), "路口通过car的数量为:" + str(len(carlist)), fill="black", font=font) del draw draw = ImageDraw.Draw(image) draw.rectangle((1300, 50, 1900, 140), fill="white") if totalcarn >= 10: draw.text((1300, 60, 1900, 100), "当前路口机动车数量为" + str(totalcarn) + "大于9" + " 拥堵", fill="black", font=font) else: draw.text((1300, 60, 1800, 100), "当前路口机动车数量:" + str(totalcarn), fill="black", font=font) draw.text((1300, 100, 1800, 140), "当前路口闯红灯数量:" + str(run_a_red_light), fill="black", font=font) del draw return image, trfn, carn, carnums, len( carlist), run_a_red_light, totalcarn
def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# if self.letterbox_image: crop_img = np.array( letterbox_image(image, (self.input_shape[1], self.input_shape[0]))) else: crop_img = image.convert('RGB') crop_img = crop_img.resize( (self.input_shape[1], self.input_shape[0]), Image.BICUBIC) photo = np.array(crop_img, dtype=np.float64) #-----------------------------------------------------------# # 图片预处理,归一化。 #-----------------------------------------------------------# photo = preprocess_input( np.reshape(photo, [1, self.input_shape[0], self.input_shape[1], 3])) preds = self.get_pred(photo).numpy() #-----------------------------------------------------------# # 将预测结果进行解码 #-----------------------------------------------------------# results = self.bbox_util.detection_out( preds, confidence_threshold=self.confidence) #--------------------------------------# # 如果没有检测到物体,则返回原图 #--------------------------------------# if len(results[0]) <= 0: return image #-----------------------------------------------------------# # 筛选出其中得分高于confidence的框 #-----------------------------------------------------------# det_label = results[0][:, 0] det_conf = results[0][:, 1] det_xmin, det_ymin, det_xmax, det_ymax = results[0][:, 2], results[ 0][:, 3], results[0][:, 4], results[0][:, 5] top_indices = [ i for i, conf in enumerate(det_conf) if conf >= self.confidence ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( det_xmin[top_indices], -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims( det_xmax[top_indices], -1), np.expand_dims(det_ymax[top_indices], -1) #-----------------------------------------------------------# # 去掉灰条部分 #-----------------------------------------------------------# if self.letterbox_image: boxes = ssd_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.input_shape[0], self.input_shape[1]]), image_shape) else: top_xmin = top_xmin * image_shape[1] top_ymin = top_ymin * image_shape[0] top_xmax = top_xmax * image_shape[1] top_ymax = top_ymax * image_shape[0] boxes = np.concatenate([top_ymin, top_xmin, top_ymax, top_xmax], axis=-1) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = max( (np.shape(image)[0] + np.shape(image)[1]) // self.input_shape[0], 1) for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c) - 1] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[int(c) - 1]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[int(c) - 1]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
def detect_image(self, image): #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# if self.letterbox_image: boxed_image = letterbox_image( image, (self.model_image_size[1], self.model_image_size[0])) else: boxed_image = image.convert('RGB') boxed_image = boxed_image.resize( (self.model_image_size[1], self.model_image_size[0]), Image.BICUBIC) image_data = np.array(boxed_image, dtype='float32') image_data /= 255. #---------------------------------------------------------# # 添加上batch_size维度 #---------------------------------------------------------# image_data = np.expand_dims(image_data, 0) #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# out_boxes, out_scores, out_classes = self.sess.run( [self.boxes, self.scores, self.classes], feed_dict={ self.yolo_model.input: image_data, self.input_image_shape: [image.size[1], image.size[0]], K.learning_phase(): 0 }) print('Found {} boxes for {}'.format(len(out_boxes), 'img')) #---------------------------------------------------------# # 设置字体 #---------------------------------------------------------# font = ImageFont.truetype(font='font/simhei.ttf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) thickness = max((image.size[0] + image.size[1]) // 300, 1) for i, c in list(enumerate(out_classes)): predicted_class = self.class_names[c] box = out_boxes[i] score = out_scores[i] top, left, bottom, right = box top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) right = min(image.size[0], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
def get_FPS(self, image, test_interval): #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# if self.letterbox_image: boxed_image = letterbox_image( image, (self.model_image_size[1], self.model_image_size[0])) else: boxed_image = image.convert('RGB') boxed_image = boxed_image.resize( (self.model_image_size[1], self.model_image_size[0]), Image.BICUBIC) image_data = np.array(boxed_image, dtype='float32') image_data /= 255. #---------------------------------------------------------# # 添加上batch_size维度 #---------------------------------------------------------# image_data = np.expand_dims(image_data, 0) # Add batch dimension. #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# if self.eager: # 预测结果 input_image_shape = np.expand_dims( np.array([image.size[1], image.size[0]], dtype='float32'), 0) out_boxes, out_scores, out_classes = self.get_pred( image_data, input_image_shape) else: # 预测结果 out_boxes, out_scores, out_classes = self.sess.run( [self.boxes, self.scores, self.classes], feed_dict={ self.yolo_model.input: image_data, self.input_image_shape: [image.size[1], image.size[0]], K.learning_phase(): 0 }) t1 = time.time() for _ in range(test_interval): #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# if self.eager: # 预测结果 input_image_shape = np.expand_dims( np.array([image.size[1], image.size[0]], dtype='float32'), 0) out_boxes, out_scores, out_classes = self.get_pred( image_data, input_image_shape) else: # 预测结果 out_boxes, out_scores, out_classes = self.sess.run( [self.boxes, self.scores, self.classes], feed_dict={ self.yolo_model.input: image_data, self.input_image_shape: [image.size[1], image.size[0]], K.learning_phase(): 0 }) t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def detect_image(self, image_id, image): f = open("./input/detection-results/" + image_id + ".txt", "w") self.confidence = 0.01 self.nms_threhold = 0.5 image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize #---------------------------------------------------------# crop_img = letterbox_image(image, [self.image_size[0], self.image_size[1]]) #----------------------------------------------------------------------------------# # 将RGB转化成BGR,这是因为原始的centernet_hourglass权值是使用BGR通道的图片训练的 #----------------------------------------------------------------------------------# photo = np.array(crop_img, dtype=np.float32)[:, :, ::-1] #-----------------------------------------------------------# # 图片预处理,归一化。获得的photo的shape为[1, 512, 512, 3] #-----------------------------------------------------------# photo = np.reshape( np.transpose(preprocess_image(photo), (2, 0, 1)), [1, self.image_size[2], self.image_size[0], self.image_size[1]]) with torch.no_grad(): images = Variable( torch.from_numpy(np.asarray(photo)).type(torch.FloatTensor)) if self.cuda: images = images.cuda() outputs = self.centernet(images) if self.backbone == 'hourglass': outputs = [ outputs[-1]["hm"].sigmoid(), outputs[-1]["wh"], outputs[-1]["reg"] ] #-----------------------------------------------------------# # 利用预测结果进行解码 #-----------------------------------------------------------# outputs = decode_bbox(outputs[0], outputs[1], outputs[2], self.image_size, self.confidence, self.cuda) #-------------------------------------------------------# # 对于centernet网络来讲,确立中心非常重要。 # 对于大目标而言,会存在许多的局部信息。 # 此时对于同一个大目标,中心点比较难以确定。 # 使用最大池化的非极大抑制方法无法去除局部框 # 所以我还是写了另外一段对框进行非极大抑制的代码 # 实际测试中,hourglass为主干网络时有无额外的nms相差不大,resnet相差较大。 #-------------------------------------------------------# try: if self.nms: outputs = np.array(nms(outputs, self.nms_threhold)) except: pass output = outputs[0] if len(output) <= 0: return image batch_boxes, det_conf, det_label = output[:, : 4], output[:, 4], output[:, 5] det_xmin, det_ymin, det_xmax, det_ymax = batch_boxes[:, 0], batch_boxes[:, 1], batch_boxes[:, 2], batch_boxes[:, 3] #-----------------------------------------------------------# # 筛选出其中得分高于confidence的框 #-----------------------------------------------------------# top_indices = [ i for i, conf in enumerate(det_conf) if conf >= self.confidence ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( det_xmin[top_indices], -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims( det_xmax[top_indices], -1), np.expand_dims(det_ymax[top_indices], -1) #-----------------------------------------------------------# # 去掉灰条部分 #-----------------------------------------------------------# boxes = centernet_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.image_size[0], self.image_size[1]]), image_shape) for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c)] score = str(top_conf[i]) top, left, bottom, right = boxes[i] f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str( int(top)), str(int(right)), str(int(bottom)))) f.close() return
import numpy as np import tensorflow as tf from PIL import Image, ImageFont, ImageDraw from utils.utils import letterbox_image from utils.setup_tool import get_classes, get_anchors import cv2 img = "test_data/london.jpg" image = Image.open(img) model_image_size = (416, 416) image_shape = (image.size[1], image.size[0], 3) model_image_size[0] % 32 == 0, 'Multiples of 32 required' model_image_size[1] % 32 == 0, 'Multiples of 32 required' boxed_image = letterbox_image(image, tuple(reversed(model_image_size))) image_data = np.array(boxed_image, dtype='float32') image_data /= 255. image_data = np.expand_dims(image_data, 0) #print(image.size) print(image_shape) print(image_data.shape) # Load TFLite model and allocate tensors. interpreter = tf.lite.Interpreter( model_path="model_data/small_mobilenet_yolo.tflite") interpreter.allocate_tensors() # Get input and output tensors.
def detect_image(self, image, classroom_id): start = timer() # 调整图片使其符合输入要求 new_image_size = (image.width - (image.width % 32), image.height - (image.height % 32)) boxed_image = letterbox_image(image, new_image_size) image_data = np.array(boxed_image, dtype='float32') image_data /= 255. image_data = np.expand_dims(image_data, 0) # Add batch dimension. # 预测结果 out_boxes, out_scores, out_classes = self.sess.run( [self.boxes, self.scores, self.classes], feed_dict={ self.yolo_model.input: image_data, self.input_image_shape: [image.size[1], image.size[0]], K.learning_phase(): 0 }) print('Found {} boxes for {}'.format(len(out_boxes), 'img')) # 设置字体 font = ImageFont.truetype(font='font/simhei.ttf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) thickness = (image.size[0] + image.size[1]) // 300 small_pic = [] for i, c in list(enumerate(out_classes)): predicted_class = self.class_names[c] box = out_boxes[i] score = out_scores[i] top, left, bottom, right = box top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) right = min(image.size[0], np.floor(right + 0.5).astype('int32')) # print(top, left, bottom, right) # point_x = (right + left) / 2 # point_y = (top + bottom) / 2 # # 判断是否有人 # if predicted_class == 'person': # result = mysql.seat_select(point_x, point_y, classroom_id) # if result.__len__() == 1: # mysql.seat_update(result[0][0]) # elif result.__len__() >= 2: # distance = 0.00 # r_id = 0 # for r in result: # pic_x = (r[3] + r[4]) / 2 # pic_y = (r[1] + r[2]) / 2 # aa = round(math.sqrt(math.pow((pic_x - point_x)) + math.pow((pic_y - point_y))), 2) # if aa > distance: # r_id = r[0] # distance = aa # if r_id != 0: # mysql.seat_update(r_id) # print('6' * 60) if predicted_class == 'person': # 画框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle( [left + i, top + i, right - i, bottom - i], outline=self.colors[c]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw end = timer() print(end - start) return image
def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# if self.letterbox_image: crop_img = np.array(letterbox_image(image, (self.model_image_size[1],self.model_image_size[0]))) else: crop_img = image.convert('RGB') crop_img = crop_img.resize((self.model_image_size[1],self.model_image_size[0]), Image.BICUBIC) photo = np.array(crop_img,dtype = np.float32) / 255.0 photo = np.transpose(photo, (2, 0, 1)) #---------------------------------------------------------# # 添加上batch_size维度 #---------------------------------------------------------# images = [photo] with torch.no_grad(): images = torch.from_numpy(np.asarray(images)) if self.cuda: images = images.cuda() #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) #---------------------------------------------------------# # 将预测框进行堆叠,然后进行非极大抑制 #---------------------------------------------------------# output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, self.config["yolo"]["classes"], conf_thres=self.confidence, nms_thres=self.iou) #---------------------------------------------------------# # 如果没有检测出物体,返回原图 #---------------------------------------------------------# try : batch_detections = batch_detections[0].cpu().numpy() except: return image #---------------------------------------------------------# # 对预测框进行得分筛选 #---------------------------------------------------------# top_index = batch_detections[:,4] * batch_detections[:,5] > self.confidence top_conf = batch_detections[top_index,4]*batch_detections[top_index,5] top_label = np.array(batch_detections[top_index,-1],np.int32) top_bboxes = np.array(batch_detections[top_index,:4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1) #-----------------------------------------------------------------# # 在图像传入网络预测前会进行letterbox_image给图像周围添加灰条 # 因此生成的top_bboxes是相对于有灰条的图像的 # 我们需要对其进行修改,去除灰条的部分。 #-----------------------------------------------------------------# if self.letterbox_image: boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape) else: top_xmin = top_xmin / self.model_image_size[1] * image_shape[1] top_ymin = top_ymin / self.model_image_size[0] * image_shape[0] top_xmax = top_xmax / self.model_image_size[1] * image_shape[1] top_ymax = top_ymax / self.model_image_size[0] * image_shape[0] boxes = np.concatenate([top_ymin,top_xmin,top_ymax,top_xmax], axis=-1) font = ImageFont.truetype(font='model_data/simhei.ttf',size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = max((np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0], 1) for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle( [left + i, top + i, right - i, bottom - i], outline=self.colors[self.class_names.index(predicted_class)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
def detect_image(self, image): '''检测图片 Parameters ---------- image: Image, Returns ------- image: ''' start = timer() if self.model_image_size != (None, None): assert self.model_image_size[ 0] % 32 == 0, 'Multiples of 32 required' assert self.model_image_size[ 1] % 32 == 0, 'Multiples of 32 required' boxed_image = letterbox_image(image, tuple(reversed( self.model_image_size))) # 填充图像 else: new_image_size = (image.width - (image.width % 32), image.height - (image.height % 32)) # <PIL.Image.Image image mode=RGB size=416*416 at 0x.. > boxed_image = letterbox_image(image, new_image_size) # 调整图片使其符合输入要求 # new_image_size = (image.width - (image.width % 32), # image.height - (image.height % 32)) # boxed_image = letterbox_image(image, new_image_size) # image_date: array, shape=(416,416,3) image_data = np.array(boxed_image, dtype='float32') image_data /= 255. # image_data: array, shape=(1, 414, 414, 3) image_data = np.expand_dims(image_data, 0) # Add batch dimension. # 预测结果:参数盒子、得分、类别;输入图像0~1,4维;原始图像的尺寸 # out_boxes:array,shape=(n, 4), n为输出图片中方框数量 # out_scores:array, shape=(n,), n同上 # out_classes:array, shape=(n,), n同上 out_boxes, out_scores, out_classes = self.sess.run( [self.boxes, self.scores, self.classes], feed_dict={ self.yolo_model.input: image_data, self.input_image_shape: [image.size[1], image.size[0]], K.learning_phase(): 0 }) # 输出检测出的框 print('Found {} boxes for {}'.format(len(out_boxes), 'img')) # 设置字体 font = ImageFont.truetype(font='font/simhei.ttf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) thickness = (image.size[0] + image.size[1]) // 300 # small_pic=[] # c为类别标号,如c=6,表示为car # i表示图片中第几个方框,如: i=0,c=6 表示第1个方框为car,predicted_class='car' for i, c in list(enumerate(out_classes)): predicted_class = self.class_names[c] box = out_boxes[i] score = out_scores[i] top, left, bottom, right = box top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) right = min(image.size[0], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw end = timer() # 检测执行时间 print("检测执行时间:" + str(end - start)) return image
def detect_image(self, image_id, image): self.confidence = 0.001 f = open("./input/detection-results/" + image_id + ".txt", "w") image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array( letterbox_image( image, (self.model_image_size[0], self.model_image_size[1]))) photo = np.array(crop_img, dtype=np.float32) photo /= 255.0 photo = np.transpose(photo, (2, 0, 1)) photo = photo.astype(np.float32) images = [] images.append(photo) images = np.asarray(images) images = torch.from_numpy(images) if self.cuda: images = images.cuda() with torch.no_grad(): outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression( output, self.config["yolo"]["classes"], conf_thres=self.confidence, nms_thres=0.3) try: batch_detections = batch_detections[0].cpu().numpy() except: return image top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = str(top_conf[i]) top, left, bottom, right = boxes[i] f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str( int(top)), str(int(right)), str(int(bottom)))) f.close() return
def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize #---------------------------------------------------------# crop_img = letterbox_image(image, [self.input_shape[0], self.input_shape[1]]) #----------------------------------------------------------------------------------# # 将RGB转化成BGR,这是因为原始的centernet_hourglass权值是使用BGR通道的图片训练的 #----------------------------------------------------------------------------------# photo = np.array(crop_img, dtype=np.float32)[:, :, ::-1] photo = np.reshape( preprocess_image(photo), [1, self.input_shape[0], self.input_shape[1], self.input_shape[2]]) preds = self.centernet.predict(photo) #--------------------------------------------------------------------------# # 对于centernet网络来讲,确立中心非常重要。 # 对于大目标而言,会存在许多的局部信息。 # 此时对于同一个大目标,中心点比较难以确定。 # 使用最大池化的非极大抑制方法无法去除局部框 # 所以我还是写了另外一段对框进行非极大抑制的代码 # 实际测试中,hourglass为主干网络时有无额外的nms相差不大,resnet相差较大。 #---------------------------------------------------------------------------# if self.nms: preds = np.array(nms(preds, self.nms_threhold)) if len(preds[0]) <= 0: return image #-----------------------------------------------------------# # 将预测结果转换成小数的形式 #-----------------------------------------------------------# preds[0][:, 0:4] = preds[0][:, 0:4] / (self.input_shape[0] / 4) det_label = preds[0][:, -1] det_conf = preds[0][:, -2] det_xmin, det_ymin, det_xmax, det_ymax = preds[0][:, 0], preds[ 0][:, 1], preds[0][:, 2], preds[0][:, 3] #-----------------------------------------------------------# # 筛选出其中得分高于confidence的框 #-----------------------------------------------------------# top_indices = [ i for i, conf in enumerate(det_conf) if conf >= self.confidence ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( det_xmin[top_indices], -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims( det_xmax[top_indices], -1), np.expand_dims(det_ymax[top_indices], -1) #-----------------------------------------------------------# # 去掉灰条部分 #-----------------------------------------------------------# boxes = centernet_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.input_shape[0], self.input_shape[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = max( (np.shape(image)[0] + np.shape(image)[1]) // self.input_shape[0], 1) for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c)] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[int(c)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[int(c)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
def get_FPS(self, image, test_interval): # 调整图片使其符合输入要求 image_shape = np.array(np.shape(image)[0:2]) crop_img = letterbox_image( image, [self.model_image_size[0], self.model_image_size[1]]) photo = np.array(crop_img, dtype=np.float32) # 图片预处理,归一化 photo = np.reshape(preprocess_input(photo), [ 1, self.model_image_size[0], self.model_image_size[1], self.model_image_size[2] ]) preds = self.get_pred(photo) preds = [pred.numpy() for pred in preds] # 将预测结果进行解码 results = self.bbox_util.detection_out( preds, self.prior, confidence_threshold=self.confidence) if len(results[0]) > 0: results = np.array(results) # 筛选出其中得分高于confidence的框 det_label = results[0][:, 5] det_conf = results[0][:, 4] det_xmin, det_ymin, det_xmax, det_ymax = results[0][:, 0], results[ 0][:, 1], results[0][:, 2], results[0][:, 3] top_indices = [ i for i, conf in enumerate(det_conf) if conf >= self.confidence ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( det_xmin[top_indices], -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims( det_xmax[top_indices], -1), np.expand_dims(det_ymax[top_indices], -1) # 去掉灰条 boxes = efficientdet_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) t1 = time.time() for _ in range(test_interval): preds = self.get_pred(photo) preds = [pred.numpy() for pred in preds] # 将预测结果进行解码 results = self.bbox_util.detection_out( preds, self.prior, confidence_threshold=self.confidence) if len(results[0]) > 0: results = np.array(results) # 筛选出其中得分高于confidence的框 det_label = results[0][:, 5] det_conf = results[0][:, 4] det_xmin, det_ymin, det_xmax, det_ymax = results[ 0][:, 0], results[0][:, 1], results[0][:, 2], results[0][:, 3] top_indices = [ i for i, conf in enumerate(det_conf) if conf >= self.confidence ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( det_xmin[top_indices], -1), np.expand_dims( det_ymin[top_indices], -1), np.expand_dims( det_xmax[top_indices], -1), np.expand_dims(det_ymax[top_indices], -1) # 去掉灰条 boxes = efficientdet_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array( [self.model_image_size[0], self.model_image_size[1]]), image_shape) t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def get_FPS(self, image, test_interval): # 调整图片使其符合输入要求 image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# if self.letterbox_image: crop_img = np.array( letterbox_image(image, (self.input_shape[1], self.input_shape[0]))) else: crop_img = image.convert('RGB') crop_img = crop_img.resize( (self.input_shape[1], self.input_shape[0]), Image.BICUBIC) photo = np.array(crop_img, dtype=np.float64) photo = preprocess_input( np.reshape(photo, [1, self.input_shape[0], self.input_shape[1], 3])) preds = self.ssd_model.predict(photo) results = self.bbox_util.detection_out( preds, confidence_threshold=self.confidence) if len(results[0]) > 0: det_label = results[0][:, 0] det_conf = results[0][:, 1] det_xmin, det_ymin, det_xmax, det_ymax = results[0][:, 2], results[ 0][:, 3], results[0][:, 4], results[0][:, 5] top_indices = [ i for i, conf in enumerate(det_conf) if conf >= self.confidence ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( det_xmin[top_indices], -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims( det_xmax[top_indices], -1), np.expand_dims(det_ymax[top_indices], -1) #-----------------------------------------------------------# # 去掉灰条部分 #-----------------------------------------------------------# if self.letterbox_image: boxes = ssd_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.input_shape[0], self.input_shape[1]]), image_shape) else: top_xmin = top_xmin * image_shape[1] top_ymin = top_ymin * image_shape[0] top_xmax = top_xmax * image_shape[1] top_ymax = top_ymax * image_shape[0] boxes = np.concatenate( [top_ymin, top_xmin, top_ymax, top_xmax], axis=-1) t1 = time.time() for _ in range(test_interval): preds = self.ssd_model.predict(photo) results = self.bbox_util.detection_out( preds, confidence_threshold=self.confidence) if len(results[0]) > 0: det_label = results[0][:, 0] det_conf = results[0][:, 1] det_xmin, det_ymin, det_xmax, det_ymax = results[ 0][:, 2], results[0][:, 3], results[0][:, 4], results[0][:, 5] top_indices = [ i for i, conf in enumerate(det_conf) if conf >= self.confidence ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( det_xmin[top_indices], -1), np.expand_dims( det_ymin[top_indices], -1), np.expand_dims( det_xmax[top_indices], -1), np.expand_dims(det_ymax[top_indices], -1) #-----------------------------------------------------------# # 去掉灰条部分 #-----------------------------------------------------------# if self.letterbox_image: boxes = ssd_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.input_shape[0], self.input_shape[1]]), image_shape) else: top_xmin = top_xmin * image_shape[1] top_ymin = top_ymin * image_shape[0] top_xmax = top_xmax * image_shape[1] top_ymax = top_ymax * image_shape[0] boxes = np.concatenate( [top_ymin, top_xmin, top_ymax, top_xmax], axis=-1) t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def get_FPS(self, image, test_interval): image = np.array(image, np.float32) im_height, im_width, _ = np.shape(image) scale = [ np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0] ] scale_for_landmarks = [ np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0] ] #---------------------------------------------------------# # letterbox_image可以给图像增加灰条,实现不失真的resize #---------------------------------------------------------# if self.letterbox_image: image = letterbox_image(image, [self.input_shape[1], self.input_shape[0]]) else: self.anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors() photo = np.expand_dims(preprocess_input(image), 0) preds = self.retinaface.predict(photo) results = self.bbox_util.detection_out( preds, self.anchors, confidence_threshold=self.confidence) if len(results) > 0: results = np.array(results) #---------------------------------------------------------# # 如果使用了letterbox_image的话,要把灰条的部分去除掉。 #---------------------------------------------------------# if self.letterbox_image: results = retinaface_correct_boxes( results, np.array([self.input_shape[0], self.input_shape[1]]), np.array([im_height, im_width])) results[:, :4] = results[:, :4] * scale results[:, 5:] = results[:, 5:] * scale_for_landmarks t1 = time.time() for _ in range(test_interval): preds = self.retinaface.predict(photo) results = self.bbox_util.detection_out( preds, self.anchors, confidence_threshold=self.confidence) if len(results) > 0: results = np.array(results) #---------------------------------------------------------# # 如果使用了letterbox_image的话,要把灰条的部分去除掉。 #---------------------------------------------------------# if self.letterbox_image: results = retinaface_correct_boxes( results, np.array([self.input_shape[0], self.input_shape[1]]), np.array([im_height, im_width])) results[:, :4] = results[:, :4] * scale results[:, 5:] = results[:, 5:] * scale_for_landmarks t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def get_bbox(self, image): image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array( letterbox_image( image, (self.model_image_size[0], self.model_image_size[1]))) photo = np.array(crop_img, dtype=np.float32) photo /= 255.0 photo = np.transpose(photo, (2, 0, 1)) photo = photo.astype(np.float32) images = [] images.append(photo) images = np.asarray(images) images = torch.from_numpy(images) if self.cuda: images = images.cuda() with torch.no_grad(): outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression( output, self.config["yolo"]["classes"], conf_thres=self.confidence, nms_thres=0.3) try: batch_detections = batch_detections[0].cpu().numpy() except: return [None] #print(batch_detections) top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) bboxes = [] for i, c in enumerate(top_label): top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) score = top_conf[i] box = [left, top, right, bottom, score, c] bboxes.append(box) return bboxes
def detect_image(self, image_id, image): self.confidence = 0.01 self.iou = 0.5 f = open("./input/detection-results/" + image_id + ".txt", "w") image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize #---------------------------------------------------------# crop_img = np.array( letterbox_image(image, (image_sizes[self.phi], image_sizes[self.phi]))) photo = np.array(crop_img, dtype=np.float32) photo = np.transpose(preprocess_input(photo), (2, 0, 1)) with torch.no_grad(): images = torch.from_numpy(np.asarray([photo])) if self.cuda: images = images.cuda() #---------------------------------------------------------# # 传入网络当中进行预测 #---------------------------------------------------------# _, regression, classification, anchors = self.net(images) #-----------------------------------------------------------# # 将预测结果进行解码 #-----------------------------------------------------------# regression = decodebox(regression, anchors, images) detection = torch.cat([regression, classification], axis=-1) batch_detections = non_max_suppression(detection, len(self.class_names), conf_thres=self.confidence, nms_thres=self.iou) #--------------------------------------# # 如果没有检测到物体,则返回原图 #--------------------------------------# try: batch_detections = batch_detections[0].cpu().numpy() except: return #-----------------------------------------------------------# # 筛选出其中得分高于confidence的框 #-----------------------------------------------------------# top_index = batch_detections[:, 4] > self.confidence top_conf = batch_detections[top_index, 4] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) #-----------------------------------------------------------# # 去掉灰条部分 #-----------------------------------------------------------# boxes = efficientdet_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([image_sizes[self.phi], image_sizes[self.phi]]), image_shape) for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = str(top_conf[i]) top, left, bottom, right = boxes[i] f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str( int(top)), str(int(right)), str(int(bottom)))) f.close() return
def detect_image(self, image_id, image, results): self.confidence = 0.01 self.nms_threhold = 0.5 image_shape = np.array(np.shape(image)[0:2]) crop_img = letterbox_image(image, [self.input_shape[0], self.input_shape[1]]) # 将RGB转化成BGR,这是因为原始的centernet_hourglass权值是使用BGR通道的图片训练的 photo = np.array(crop_img, dtype=np.float32)[:, :, ::-1] # 图片预处理,归一化 photo = np.reshape( preprocess_image(photo), [1, self.input_shape[0], self.input_shape[1], self.input_shape[2]]) preds = self.centernet.predict(photo) if self.nms: preds = np.array(nms(preds, self.nms_threhold)) if len(preds[0]) <= 0: return results preds[0][:, 0:4] = preds[0][:, 0:4] / (self.input_shape[0] / 4) # 筛选出其中得分高于confidence的框 det_label = preds[0][:, -1] det_conf = preds[0][:, -2] det_xmin, det_ymin, det_xmax, det_ymax = preds[0][:, 0], preds[ 0][:, 1], preds[0][:, 2], preds[0][:, 3] top_indices = [ i for i, conf in enumerate(det_conf) if conf >= self.confidence ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( det_xmin[top_indices], -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims( det_xmax[top_indices], -1), np.expand_dims(det_ymax[top_indices], -1) # 去掉灰条 boxes = centernet_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.input_shape[0], self.input_shape[1]]), image_shape) for i, c in enumerate(top_label_indices): result = {} predicted_class = self.class_names[int(c)] top, left, bottom, right = boxes[i] top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) right = min(image.size[0], np.floor(right + 0.5).astype('int32')) result["image_id"] = int(image_id) result["category_id"] = clsid2catid[c] result["bbox"] = [ float(left), float(top), float(right - left), float(bottom - top) ] result["score"] = float(top_conf[i]) results.append(result) return results
def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) crop_img, x_offset, y_offset = letterbox_image( image, [self.model_image_size[0], self.model_image_size[1]]) photo = np.array(crop_img, dtype=np.float64) # 图片预处理,归一化 photo = preprocess_input( np.reshape(photo, [ 1, self.model_image_size[0], self.model_image_size[1], self.model_image_size[2] ])) preds = self.m2det.predict(photo) # 将预测结果进行解码 results = self.bbox_util.detection_out( preds, self.prior, confidence_threshold=self.confidence) if len(results[0]) <= 0: return image # 筛选出其中得分高于confidence的框 det_label = results[0][:, 0] det_conf = results[0][:, 1] det_xmin, det_ymin, det_xmax, det_ymax = results[0][:, 2], results[ 0][:, 3], results[0][:, 4], results[0][:, 5] top_indices = [ i for i, conf in enumerate(det_conf) if conf >= self.confidence ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( det_xmin[top_indices], -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims( det_xmax[top_indices], -1), np.expand_dims(det_ymax[top_indices], -1) # 去掉灰条 boxes = m2det_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0] for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c - 1)] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[int(c - 1)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[int(c - 1)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
def detect_image(sess, image): start = timer() image_shape = (image.size[1], image.size[0], 3) model_image_size[0] % 32 == 0, 'Multiples of 32 required' model_image_size[1] % 32 == 0, 'Multiples of 32 required' boxed_image = letterbox_image(image, tuple(reversed(model_image_size))) image_data = np.array(boxed_image, dtype='float32') image_data /= 255. image_data = np.expand_dims(image_data, 0) #print(image.size) #print(image_shape) #print(image_data.shape) outs = tf_out(sess, image_data) out_boxes, out_classes, out_scores = yolo_out(outs, image_shape) print(model_image_size) if not out_boxes is None: print('Found {} boxes for {}'.format(len(out_boxes), 'img')) #print(out_boxes) font = ImageFont.truetype(font='font/FiraMono-Medium.otf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) thickness = (image.size[0] + image.size[1]) // 300 for i, c in reversed(list(enumerate(out_classes))): predicted_class = class_names[c] box = out_boxes[i] score = out_scores[i] label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) x, y, w, h = box top = max(0, np.floor(y + 0.5).astype('int32')) left = max(0, np.floor(x + 0.5).astype('int32')) bottom = min(image.size[1], np.floor(y + h + 0.5).astype('int32')) right = min(image.size[0], np.floor(x + w + 0.5).astype('int32')) print(label, (left, top), (right, bottom), ((right - left), (bottom - top))) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) # My kingdom for a good redistributable image drawing library. for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=colors[c]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=colors[c]) draw.text(text_origin, label, fill=(0, 0, 0), font=font) del draw else: print('No Boxes') end = timer() print(end - start) return image