def get_map_txt(self, image_id, image, class_names, map_out_path): f = open( os.path.join(map_out_path, "detection-results/" + image_id + ".txt"), "w") image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) #---------------------------------------------------------# # 添加上batch_size维度 #---------------------------------------------------------# image_data = np.expand_dims( np.transpose( preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) with torch.no_grad(): images = torch.from_numpy(image_data) if self.cuda: images = images.cuda() #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# outputs = self.net(images) outputs = self.bbox_util.decode_box(outputs) #---------------------------------------------------------# # 将预测框进行堆叠,然后进行非极大抑制 #---------------------------------------------------------# results = self.bbox_util.non_max_suppression( torch.cat(outputs, 1), self.num_classes, self.input_shape, image_shape, self.letterbox_image, conf_thres=self.confidence, nms_thres=self.nms_iou) if results[0] is None: return top_label = np.array(results[0][:, 6], dtype='int32') top_conf = results[0][:, 4] * results[0][:, 5] top_boxes = results[0][:, :4] for i, c in list(enumerate(top_label)): predicted_class = self.class_names[int(c)] box = top_boxes[i] score = str(top_conf[i]) top, left, bottom, right = box if predicted_class not in class_names: continue f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str( int(top)), str(int(right)), str(int(bottom)))) f.close() return
def get_FPS(self, image, test_interval): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) #---------------------------------------------------------# # 添加上batch_size维度 #---------------------------------------------------------# image_data = np.expand_dims( np.transpose( preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) with torch.no_grad(): images = torch.from_numpy(image_data) if self.cuda: images = images.cuda() #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# outputs = self.net(images) outputs = self.bbox_util.decode_box(outputs) #---------------------------------------------------------# # 将预测框进行堆叠,然后进行非极大抑制 #---------------------------------------------------------# results = self.bbox_util.non_max_suppression( torch.cat(outputs, 1), self.num_classes, self.input_shape, image_shape, self.letterbox_image, conf_thres=self.confidence, nms_thres=self.nms_iou) t1 = time.time() for _ in range(test_interval): with torch.no_grad(): #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# outputs = self.net(images) outputs = self.bbox_util.decode_box(outputs) #---------------------------------------------------------# # 将预测框进行堆叠,然后进行非极大抑制 #---------------------------------------------------------# results = self.bbox_util.non_max_suppression( torch.cat(outputs, 1), self.num_classes, self.input_shape, image_shape, self.letterbox_image, conf_thres=self.confidence, nms_thres=self.nms_iou) t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def __getitem__(self, index): for i, global_index in enumerate( range(index * self.batch_size, (index + 1) * self.batch_size)): global_index = global_index % self.length image, boxes, mask_gt, num_crowds, image_id = self.pull_item( global_index) #------------------------------# # 获得种类 #------------------------------# class_ids = boxes[:, -1] #------------------------------# # 获得框的坐标 #------------------------------# boxes = boxes[:, :-1] image, image_meta, gt_class_ids, gt_boxes, gt_masks = \ load_image_gt(image, mask_gt, boxes, class_ids, image_id, self.config, use_mini_mask=self.config.USE_MINI_MASK) #------------------------------# # 初始化用于训练的内容 #------------------------------# if i == 0: batch_image_meta = np.zeros( (self.batch_size, ) + image_meta.shape, dtype=image_meta.dtype) batch_rpn_match = np.zeros( [self.batch_size, self.anchors.shape[0], 1], dtype=np.int32) batch_rpn_bbox = np.zeros([ self.batch_size, self.config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4 ], dtype=np.float32) batch_images = np.zeros((self.batch_size, ) + image.shape, dtype=np.float32) batch_gt_class_ids = np.zeros( (self.batch_size, self.config.MAX_GT_INSTANCES), dtype=np.int32) batch_gt_boxes = np.zeros( (self.batch_size, self.config.MAX_GT_INSTANCES, 4), dtype=np.int32) batch_gt_masks = np.zeros( (self.batch_size, gt_masks.shape[0], gt_masks.shape[1], self.config.MAX_GT_INSTANCES), dtype=gt_masks.dtype) if not np.any(gt_class_ids > 0): continue # RPN Targets rpn_match, rpn_bbox = build_rpn_targets(image.shape, self.anchors, gt_class_ids, gt_boxes, self.config) #-----------------------------------------------------------------------# # 如果某张图片里面物体的数量大于最大值的话,则进行筛选,防止过大 #-----------------------------------------------------------------------# if gt_boxes.shape[0] > self.config.MAX_GT_INSTANCES: ids = np.random.choice(np.arange(gt_boxes.shape[0]), self.config.MAX_GT_INSTANCES, replace=False) gt_class_ids = gt_class_ids[ids] gt_boxes = gt_boxes[ids] gt_masks = gt_masks[:, :, ids] #------------------------------# # 将当前信息加载进batch #------------------------------# batch_image_meta[i] = image_meta batch_rpn_match[i] = rpn_match[:, np.newaxis] batch_rpn_bbox[i] = rpn_bbox batch_images[i] = preprocess_input(image.astype(np.float32)) batch_gt_class_ids[i, :gt_class_ids.shape[0]] = gt_class_ids batch_gt_boxes[i, :gt_boxes.shape[0]] = gt_boxes batch_gt_masks[i, :, :, :gt_masks.shape[-1]] = gt_masks return [batch_images, batch_image_meta, batch_rpn_match, batch_rpn_bbox, batch_gt_class_ids, batch_gt_boxes, batch_gt_masks], \ [np.zeros(self.batch_size), np.zeros(self.batch_size), np.zeros(self.batch_size), np.zeros(self.batch_size), np.zeros(self.batch_size)]
def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) #---------------------------------------------------------# # 添加上batch_size维度 #---------------------------------------------------------# image_data = np.expand_dims( np.transpose( preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) with torch.no_grad(): images = torch.from_numpy(image_data) if self.cuda: images = images.cuda() #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# outputs = self.net(images) outputs = self.bbox_util.decode_box(outputs) #---------------------------------------------------------# # 将预测框进行堆叠,然后进行非极大抑制 #---------------------------------------------------------# results = self.bbox_util.non_max_suppression( torch.cat(outputs, 1), self.num_classes, self.input_shape, image_shape, self.letterbox_image, conf_thres=self.confidence, nms_thres=self.nms_iou) if results[0] is None: return image top_label = np.array(results[0][:, 6], dtype='int32') top_conf = results[0][:, 4] * results[0][:, 5] top_boxes = results[0][:, :4] #---------------------------------------------------------# # 设置字体与边框厚度 #---------------------------------------------------------# font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) thickness = int( max((image.size[0] + image.size[1]) // np.mean(self.input_shape), 1)) #---------------------------------------------------------# # 图像绘制 #---------------------------------------------------------# for i, c in list(enumerate(top_label)): predicted_class = self.class_names[int(c)] box = top_boxes[i] score = top_conf[i] top, left, bottom, right = box top = max(0, np.floor(top).astype('int32')) left = max(0, np.floor(left).astype('int32')) bottom = min(image.size[1], np.floor(bottom).astype('int32')) right = min(image.size[0], np.floor(right).astype('int32')) label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
def detect_image(self, image): #---------------------------------------------------# # 获得输入图片的高和宽 #---------------------------------------------------# image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) #---------------------------------------------------------# # 添加上batch_size维度,图片预处理,归一化。 #---------------------------------------------------------#BBoxUtility image_data = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0) outputs = self.get_pred(image_data).numpy() #--------------------------------------------------------------------------------------------# # centernet后处理的过程,包括门限判断和传统非极大抑制。 # 对于centernet网络来讲,确立中心非常重要。对于大目标而言,会存在许多的局部信息。 # 此时大目标中心点比较难以确定。使用最大池化的非极大抑制方法无法去除局部框 # 这里面存在传统的nms处理方法,可以选择关闭和开启。 # 实际测试中,hourglass为主干网络时有无额外的nms相差不大,resnet相差较大。BBoxUtility #--------------------------------------------------------------------------------------------# results = self.bbox_util.postprocess(outputs, self.nms, image_shape, self.input_shape, self.letterbox_image, confidence=self.confidence) #--------------------------------------# # 如果没有检测到物体,则返回原图 #--------------------------------------# if results[0] is None: return image top_label = np.array(results[0][:, 5], dtype = 'int32') top_conf = results[0][:, 4] top_boxes = results[0][:, :4] #---------------------------------------------------------# # 设置字体与边框厚度 #---------------------------------------------------------# font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = max((np.shape(image)[0] + np.shape(image)[1]) // self.input_shape[0], 1) #---------------------------------------------------------# # 图像绘制 #---------------------------------------------------------# for i, c in list(enumerate(top_label)): predicted_class = self.class_names[int(c)] box = top_boxes[i] score = top_conf[i] top, left, bottom, right = box top = max(0, np.floor(top).astype('int32')) left = max(0, np.floor(left).astype('int32')) bottom = min(image.size[1], np.floor(bottom).astype('int32')) right = min(image.size[0], np.floor(right).astype('int32')) label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c]) draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image