def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array( letterbox_image(image, (self.model_image_size[1], self.model_image_size[0]))) # letterbox_image??? photo = np.array(crop_img, dtype=np.float32) photo /= 255.0 # 归一化操作 photo = np.transpose(photo, (2, 0, 1)) # 通道维度调整(pytorch),有利于GPU处理 photo = photo.astype(np.float32) images = [] # 扩充一个维度 images.append(photo) # 扩充一个维度 images = np.asarray(images) images = torch.from_numpy(images) # numpy转化为tensor if self.cuda: images = images.cuda() with torch.no_grad(): outputs = self.net(images) # 图片传入网络,得到网络的预测结果 output_list = [] # 三个size的预测结果 for i in range(3): # 经过三次循环对特征层解码(先验框) output_list.append(self.yolo_decodes[i]( outputs[i])) # yolo_decodes 先验框调整的过程 output = torch.cat(output_list, 1) batch_detections = non_max_suppression( output, self.config["yolo"]["classes"], # 非极大值抑制 conf_thres=self.confidence, nms_thres=self.iou) try: batch_detections = batch_detections[0].cpu().numpy() # 判断图片是否还有框 except: return image top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条(基于原图的坐标绘制框) boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) # 定义字体 thickness = (np.shape(image)[0] + np.shape(image)[1] ) // self.model_image_size[0] # 框的宽度怎么样子的 # 画图的代码 for i, c in enumerate(top_label): predicted_class = self.class_names[c] # 取出类的名称 score = top_conf[i] # 取出类的得分 top, left, bottom, right = boxes[i] # # 取出类的位置 top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle( # 绘画矩形 [left + i, top + i, right - i, bottom - i], outline=self.colors[self.class_names.index( predicted_class)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) # 写字 del draw return image
Tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.FloatTensor imgs = [] # Stores image paths img_detections = [] # Stores detections for each image index print("\nPerforming object detection:") prev_time = time.time() for batch_i, (img_paths, input_imgs) in enumerate(dataloader): # Configure input input_imgs = Variable(input_imgs.type(Tensor)) # Get detections with torch.no_grad(): detections = model(input_imgs) detections = non_max_suppression(detections, opt.conf_thres, opt.nms_thres) # Log progress current_time = time.time() inference_time = datetime.timedelta(seconds=current_time - prev_time) prev_time = current_time print("\t+ Batch %d, Inference Time: %s" % (batch_i, inference_time)) # Save image and detections imgs.extend(img_paths) img_detections.extend(detections) # Bounding-box colors cmap = plt.get_cmap("tab20b") colors = [cmap(i) for i in np.linspace(0, 1, 20)]
def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# if self.letterbox_image: crop_img = np.array( letterbox_image( image, (self.model_image_size[1], self.model_image_size[0]))) else: crop_img = image.convert('RGB') crop_img = crop_img.resize( (self.model_image_size[1], self.model_image_size[0]), Image.BICUBIC) photo = np.array(crop_img, dtype=np.float32) / 255.0 photo = np.transpose(photo, (2, 0, 1)) #---------------------------------------------------------# # 添加上batch_size维度 #---------------------------------------------------------# images = [photo] with torch.no_grad(): images = torch.from_numpy(np.asarray(images)) if self.cuda: images = images.cuda() #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# outputs = self.net(images) output_list = [] for i in range(2): output_list.append(self.yolo_decodes[i](outputs[i])) #---------------------------------------------------------# # 将预测框进行堆叠,然后进行非极大抑制 #---------------------------------------------------------# output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=self.iou) #---------------------------------------------------------# # 如果没有检测出物体,返回原图 #---------------------------------------------------------# try: batch_detections = batch_detections[0].cpu().numpy() except: return image #---------------------------------------------------------# # 对预测框进行得分筛选 #---------------------------------------------------------# top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) #-----------------------------------------------------------------# # 在图像传入网络预测前会进行letterbox_image给图像周围添加灰条 # 因此生成的top_bboxes是相对于有灰条的图像的 # 我们需要对其进行修改,去除灰条的部分。 #-----------------------------------------------------------------# if self.letterbox_image: boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array( [self.model_image_size[0], self.model_image_size[1]]), image_shape) else: top_xmin = top_xmin / self.model_image_size[1] * image_shape[1] top_ymin = top_ymin / self.model_image_size[0] * image_shape[0] top_xmax = top_xmax / self.model_image_size[1] * image_shape[1] top_ymax = top_ymax / self.model_image_size[0] * image_shape[0] boxes = np.concatenate( [top_ymin, top_xmin, top_ymax, top_xmax], axis=-1) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = max((np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0], 1) for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[self.class_names.index( predicted_class)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array( letterbox_image( image, (self.model_image_size[0], self.model_image_size[1]))) photo = np.array(crop_img, dtype=np.float32) photo /= 255.0 photo = np.transpose(photo, (2, 0, 1)) photo = photo.astype(np.float32) images = [] images.append(photo) images = np.asarray(images) with torch.no_grad(): images = torch.from_numpy(images) if self.cuda: images = images.cuda() outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=0.3) try: batch_detections = batch_detections[0].cpu().numpy() except: return image top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0] print('画面中有{}个人'.format(len(boxes))) font_cn = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[self.class_names.index( predicted_class)]) show_str = ' 画面中有' + str(len(boxes)) + '个人 ' label_size1 = draw.textsize(show_str, font_cn) print(label_size1) draw.rectangle([10, 10, 10 + label_size1[0], 10 + label_size1[1]], fill=(255, 255, 0)) draw.text((10, 10), show_str, fill=(0, 0, 0), font=font_cn) ''' draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) #draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font) ''' del draw return image
def detect_image(self, image_id, image): self.confidence = 0.05 f = open("./input/detection-results/" + image_id + ".txt", "w") image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array( letterbox_image( image, (self.model_image_size[0], self.model_image_size[1]))) photo = np.array(crop_img, dtype=np.float32) photo /= 255.0 photo = np.transpose(photo, (2, 0, 1)) photo = photo.astype(np.float32) images = [] images.append(photo) images = np.asarray(images) images = torch.from_numpy(images) if self.cuda: images = images.cuda() with torch.no_grad(): outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression( output, self.config["yolo"]["classes"], conf_thres=self.confidence, nms_thres=0.3) try: batch_detections = batch_detections[0].cpu().numpy() except: return image top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = str(top_conf[i]) top, left, bottom, right = boxes[i] f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str( int(top)), str(int(right)), str(int(bottom)))) f.close() return
def detect(): # 0、初始化一些参数 cfg = opt.cfg weights = opt.weights src_txt_path = opt.src_txt_path img_size = opt.img_size batch_size = opt.batch_size dst_path = opt.dst_path if not os.path.exists(dst_path): os.mkdir(dst_path) device = select_device(opt.device) classes = load_classes(parse_data_cfg(opt.data)['names']) # 1、加载网络 model = Darknet(cfg) if weights.endswith('.pt'): # TODO: .weights权重格式 model.load_state_dict( torch.load(weights)['model']) # TODO:map_location=device ? model.to(device).eval() # 2、加载数据集 test_dataset = VocDataset(src_txt_path, img_size, with_label=False) dataloader = DataLoader( test_dataset, batch_size=batch_size, shuffle=False, num_workers=8, # TODO collate_fn=test_dataset.test_collate_fn) # TODO # 3、预测,前向传播 start = time.time() pbar = tqdm(dataloader) for i, (img_tensor, img0, img_name) in enumerate(pbar): pbar.set_description("Already Processed %d image: " % (i + 1)) # print('clw: Already Processed %d image' % (i+1)) img_tensor = img_tensor.to(device) # (bs, 3, 416, 416) output = model(img_tensor)[ 0] # (x1, y1, x2, y2, obj_conf, class_conf, class_pred) # NMS nms_output = non_max_suppression(output, opt.conf_thres, opt.nms_thres) # 可视化 for batch_idx, det in enumerate(nms_output): # detections per image if det is not None: # and len(det): # clw note: important ! #or box in det: for *box, conf, _, cls in det: # det: tensor.Size (bs, 7) box: list orig_h, orig_w = img0[batch_idx].shape[:2] # 坐标变换 new_h = new_w = img_tensor.size()[ 2] # 绘图,resize后的图的框 -> 原图的框,new -> orig ratio_h = orig_h / new_h ratio_w = orig_w / new_w x1 = int(ratio_w * box[0]) y1 = int(ratio_h * box[1]) x2 = int(ratio_w * (box[2])) y2 = int(ratio_h * (box[3])) label = '%s %.2f' % (classes[int(cls)], conf) # 预测结果可视化 plot_one_box([x1, y1, x2, y2], img0[batch_idx], label=label, color=(255, 0, 0)) #cv2.rectangle(img0[batch_idx], (x1, y1), (x2, y2), (0, 0, 255), 1) # 如果报错 TypeError: an integer is required (got type tuple),检查是不是传入了img_tensor if SAVE: # 保存结果 cv2.imwrite(os.path.join(dst_path, img_name[batch_idx]), img0[batch_idx]) if SHOW: cv2.imshow('aaa', img0[batch_idx]) cv2.waitKey(0) print('time use: %.3fs' % (time.time() - start))
def get_FPS(self, image, test_interval): # 调整图片使其符合输入要求 image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array(letterbox_image(image, (self.model_image_size[1],self.model_image_size[0]))) photo = np.array(crop_img,dtype = np.float32) photo /= 255.0 photo = np.transpose(photo, (2, 0, 1)) photo = photo.astype(np.float32) images = [] images.append(photo) images = np.asarray(images) with torch.no_grad(): images = torch.from_numpy(images) if self.cuda: images = images.cuda() outputs = self.net(images) output_list = [] for i in range(2): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=self.iou) try: batch_detections = batch_detections[0].cpu().numpy() top_index = batch_detections[:,4]*batch_detections[:,5] > self.confidence top_conf = batch_detections[top_index,4]*batch_detections[top_index,5] top_label = np.array(batch_detections[top_index,-1],np.int32) top_bboxes = np.array(batch_detections[top_index,:4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1) # 去掉灰条 boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape) except: pass t1 = time.time() for _ in range(test_interval): with torch.no_grad(): outputs = self.net(images) output_list = [] for i in range(2): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=self.iou) try: batch_detections = batch_detections[0].cpu().numpy() top_index = batch_detections[:,4]*batch_detections[:,5] > self.confidence top_conf = batch_detections[top_index,4]*batch_detections[top_index,5] top_label = np.array(batch_detections[top_index,-1],np.int32) top_bboxes = np.array(batch_detections[top_index,:4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1) # 去掉灰条 boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape) except: pass t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def forward(self, x): return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)
def validation_step(self, opt, outputs, batch, batch_idx, epoch): imgs, targets, paths, shapes, pad = batch _, _, height, width = imgs.shape inf_out, train_out = outputs whwh = torch.Tensor([width, height, width, height]).to(imgs.device) losses = compute_loss(train_out, targets, self.model)[1][:3] # GIoU, obj, cls output = non_max_suppression(inf_out, conf_thres=opt.conf_thres, iou_thres=opt.iou_thres, multi_label=self.calc_ni(batch_idx, epoch) > self.n_burn) # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class self.seen += 1 if pred is None: if nl: self.stats.append((torch.zeros(0, self.niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Append to text file # with open('test.txt', 'a') as file: # [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred] # Clip boxes to image bounds clip_coords(pred, (height, width)) # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], self.niou, dtype=torch.bool, device=imgs.device) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) * whwh # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero().view(-1) # target indices pi = (cls == pred[:, 5]).nonzero().view(-1) # prediction indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(pred[pi, :4], tbox[ti]).max(1) # best ious, indices # Append detections for j in (ious > self.iouv[0].to(ious.device)).nonzero(): d = ti[i[j]] # detected target if d not in detected: detected.append(d) correct[pi[j]] = ious[j] > self.iouv # iou_thres is 1xn if len(detected) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) self.stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) return losses
def main(): img_size = 512 # 必须是32的整数倍 [416, 512, 608] cfg = "cfg/my_yolov3.cfg" # 改成生成的.cfg文件 weights = "weights/yolov3spp-voc-512.pth".format(img_size) # 改成自己训练好的权重文件 json_path = "./data/pascal_voc_classes.json" # json标签文件 img_path = "test.jpg" assert os.path.exists(cfg), "cfg file {} dose not exist.".format(cfg) assert os.path.exists(weights), "weights file {} dose not exist.".format( weights) assert os.path.exists(json_path), "json file {} dose not exist.".format( json_path) assert os.path.exists(img_path), "image file {} dose not exist.".format( img_path) json_file = open(json_path, 'r') class_dict = json.load(json_file) category_index = {v: k for k, v in class_dict.items()} input_size = (img_size, img_size) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = YOLOV3_SPP(cfg, img_size) model.load_state_dict(torch.load(weights, map_location=device)["model"]) model.to(device) model.eval() with torch.no_grad(): # init img = torch.zeros((1, 3, img_size, img_size), device=device) model(img) img_o = cv2.imread(img_path) # BGR assert img_o is not None, "Image Not Found " + img_path img = img_utils.letterbox(img_o, new_shape=input_size, auto=True, color=(0, 0, 0))[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(device).float() img /= 255.0 # scale (0, 255) to (0, 1) img = img.unsqueeze(0) # add batch dimension t1 = torch_utils.time_synchronized() pred = model(img)[0] # only get inference result t2 = torch_utils.time_synchronized() print(t2 - t1) pred = utils.non_max_suppression(pred, conf_thres=0.1, iou_thres=0.6, multi_label=True)[0] t3 = time.time() print(t3 - t2) # process detections pred[:, :4] = utils.scale_coordinates(pred[:, :4], img.shape[2:], img_o.shape).round() print(pred.shape) bboxes = pred[:, :4].detach().cpu().numpy() scores = pred[:, 4].detach().cpu().numpy() classes = pred[:, 5].detach().cpu().numpy().astype(np.int) + 1 img_o = draw_box(img_o[:, :, ::-1], bboxes, classes, scores, category_index) plt.imshow(img_o) plt.show() img_o.save("test_result.jpg")
def predict( data_dir: Union[str, os.PathLike], weights: Union[str, os.PathLike], batch_size: Optional[int] = 8, num_workers: Optional[int] = 1, resize: Optional[Union[int, Tuple[int, int]]] = None, file_ext: Optional[str] = "jpg", confidence: Optional[float] = 0.5, nms_threshold: Optional[float] = 0.5, output_path: Union[str, os.PathLike] = "../", ): dataset = YoloDataset(data_dir, file_ext=file_ext, resize=resize) loader = DataLoader(dataset, batch_size=batch_size, pin_memory=True, num_workers=num_workers) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(f"using device {device}") print("loading model...") model = attempt_load(weights, map_location=device) model.eval() # print(type(model)) predictions = [] for batch in tqdm(loader): batch = batch.to(device) # print(batch.size()) with torch.no_grad(): pred = model(batch, augment=False)[0] pred = non_max_suppression(pred, confidence, nms_threshold, classes=None, agnostic=False) predictions.extend([to_cpu(p) for p in pred]) predictions = Parallel(n_jobs=os.cpu_count(), backend="multiprocessing")( delayed(postprocess)(p) for p in tqdm(predictions)) if output_path.endswith(".json"): if os.path.exists(os.path.dirname(output_path)): output_file = output_path else: raise IOError( f"{Fore.RED} no such directory {os.path.dirname(output_path)} {Style.RESET_ALL}" ) elif os.path.isdir(output_path): output_file = os.path.join( output_dir, "yolov5_predictions_" + str(time.time()).split(".")[0] + ".json") else: raise IOError( f"{Fore.RED} no such directory {os.path.dirname(output_path)} {Style.RESET_ALL}" ) filenames = dataset.filenames output_dict = dict(zip(filenames, predictions)) with open(output_file, "w") as f: json.dump(output_dict, f, indent=2)
def detect_onnx(official=True, image_path=None): num_classes = 80 # anchors = [[116, 90, 156, 198, 373, 326], [30, 61, 62, 45, 59, 119], [10, 13, 16, 30, 33, 23]] # 5s anchors = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]] session = onnxruntime.InferenceSession('./weights/best.onnx') # print("The model expects input shape: ", session.get_inputs()[0].shape) batch_size = session.get_inputs()[0].shape[0] img_size_h = session.get_inputs()[0].shape[2] img_size_w = session.get_inputs()[0].shape[3] # input image_src = Image.open(image_path) resized = letterbox_image(image_src, (img_size_w, img_size_h)) img_in = np.transpose(resized, (2, 0, 1)).astype(np.float32) # HWC -> CHW img_in = np.expand_dims(img_in, axis=0) img_in /= 255.0 # print("Shape of the image input shape: ", img_in.shape) # inference input_name = session.get_inputs()[0].name outputs = session.run(None, {input_name: img_in}) batch_detections = [] if official and len( outputs ) == 4: # model.model[-1].export = boolean ---> True:3 False:4 # model.model[-1].export = False ---> outputs[0] (1, xxxx, 85) # 直接使用官方代码 batch_detections = torch.from_numpy(np.array(outputs[0])) batch_detections = non_max_suppression(batch_detections, conf_thres=0.4, iou_thres=0.5, agnostic=False) else: # model.model[-1].export = False ---> outputs[1]/outputs[2]/outputs[2] # model.model[-1].export = True ---> outputs # (1, 3, 20, 20, 85) # (1, 3, 40, 40, 85) # (1, 3, 80, 80, 85) # 自己手写处理 (部分原理来自 yolo.py Detect) boxs = [] a = torch.tensor(anchors).float().view(3, -1, 2) anchor_grid = a.clone().view(3, 1, -1, 1, 1, 2) if len(outputs) == 4: outputs = [outputs[1], outputs[2], outputs[3]] for index, out in enumerate(outputs): out = torch.from_numpy(out) batch = out.shape[1] feature_w = out.shape[2] feature_h = out.shape[3] # Feature map corresponds to the original image zoom factor stride_w = int(img_size_w / feature_w) stride_h = int(img_size_h / feature_h) grid_x, grid_y = np.meshgrid(np.arange(feature_w), np.arange(feature_h)) # cx, cy, w, h pred_boxes = torch.FloatTensor(out[..., :4].shape) pred_boxes[..., 0] = (torch.sigmoid(out[..., 0]) * 2.0 - 0.5 + grid_x) * stride_w # cx pred_boxes[..., 1] = (torch.sigmoid(out[..., 1]) * 2.0 - 0.5 + grid_y) * stride_h # cy pred_boxes[..., 2:4] = (torch.sigmoid(out[..., 2:4]) * 2)**2 * anchor_grid[index] # wh conf = torch.sigmoid(out[..., 4]) pred_cls = torch.sigmoid(out[..., 5:]) output = torch.cat((pred_boxes.view( batch_size, -1, 4), conf.view(batch_size, -1, 1), pred_cls.view(batch_size, -1, num_classes)), -1) boxs.append(output) outputx = torch.cat(boxs, 1) # NMS batch_detections = w_non_max_suppression(outputx, num_classes, conf_thres=0.4, nms_thres=0.3) return batch_detections
def test(cfg, data, batch_size, img_size, conf_thres, iou_thres, nms_thres, src_txt_path, weights, log_file_path=None, model=None): # 0、初始化一些参数 data = parse_data_cfg(data) nc = int(data['classes']) # number of classes names = load_classes(data['names']) # 1、加载网络 if model is None: device = select_device('0') model = Darknet(cfg) if weights.endswith('.pt'): # TODO: .weights权重格式 model.load_state_dict( torch.load(weights, map_location=device)['model'] ) # 20200704_50epoch_modify_noobj # TODO:map_location=device ? if torch.cuda.device_count() > 1: model = nn.DataParallel(model) # clw note: 多卡 else: device = next(model.parameters()).device # get model device model.to(device).eval() # 2、加载数据集 test_dataset = VocDataset(src_txt_path, img_size, with_label=True, is_training=False) dataloader = DataLoader( test_dataset, batch_size=batch_size, shuffle=False, num_workers=8, # TODO collate_fn=test_dataset.test_collate_fn, # TODO pin_memory=True) # 3、预测,前向传播 image_nums = 0 s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP@{}'.format(iou_thres), 'F1') #s = ('%20s' + '%10s' * 6) % ('Class', 'ImgNum', 'Target', 'P', 'R', '[email protected]', 'F1') p, r, f1, mp, mr, map, mf1 = 0., 0., 0., 0., 0., 0., 0. jdict, stats, ap, ap_class = [], [], [], [] pbar = tqdm(dataloader) for i, (img_tensor, target_tensor, _, _) in enumerate(pbar): img_tensor = img_tensor.to(device) # (bs, 3, 416, 416) target_tensor = target_tensor.to(device) height, width = img_tensor.shape[2:] start = time.time() # Disable gradients with torch.no_grad(): # (1) Run model output = model( img_tensor ) # (x1, y1, x2, y2, obj_conf, class_conf, class_pred) # (2) NMS nms_output = non_max_suppression(output, conf_thres, nms_thres) s = 'time use per batch: %.3fs' % (time.time() - start) pbar.set_description(s) for batch_idx, pred in enumerate(nms_output): # pred: (bs, 7) labels = target_tensor[target_tensor[:, 0] == batch_idx, 1:] nl = len(labels) # len of label tcls = labels[:, 0].tolist() if nl else [] # target class image_nums += 1 # 考虑一个预测 box 都没有的情况,比如 conf 太高 if pred is None: if nl: stats.append(([], torch.Tensor(), torch.Tensor(), tcls)) continue # Clip boxes to image bounds TODO:有必要,因为 label 都是经过clip的,所以如果去掉clip,mAP应该会有所降低 clip_coords(pred, (height, width)) # mAP is the same # Assign all predictions as incorrect correct = [0] * len(pred) if nl: detected = [] tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) tbox[:, [0, 2]] *= img_tensor[batch_idx].size()[2] # w tbox[:, [1, 3]] *= img_tensor[batch_idx].size()[1] # h # Search for correct predictions for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred): # Break if all targets already located in image if len(detected) == nl: break # Continue if predicted class not among image classes if pcls.item() not in tcls: continue # Best iou, index between pred and targets m = (pcls == tcls_tensor).nonzero().view(-1) iou, bi = bbox_iou(pbox, tbox[m]).max(0) # If iou > threshold and class is correct mark as correct if iou > iou_thres and m[ bi] not in detected: # and pcls == tcls[bi]: correct[i] = 1 detected.append(m[bi]) # print('stats.append: ', (correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls)) ''' pred flag ( [1, 0, 1, 0, 0, 1, 0, 0, 1], pred conf tensor([0.17245, 0.14642, 0.07215, 0.07138, 0.07069, 0.06449, 0.06222, 0.05580, 0.05452]), pred cls tensor([2., 2., 2., 2., 2., 2., 2., 2., 2.]), lb_cls [2.0, 2.0, 2.0, 2.0, 2.0]) stats is a [] ''' stats.append( (correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls)) # Append statistics (correct, conf, pcls, tcls) # after get stats for all images , ... # Compute statistics stats = [np.concatenate(x, 0) for x in list(zip(*stats))] # to numpy if len(stats): p, r, ap, f1, ap_class = ap_per_class(*stats) mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results # time.sleep(0.01) # clw note: 防止前面 tqdm 还没输出,但是这里已经打印了 #pf = '%20s' + '%10.3g' * 6 # print format pf = '%20s' + '%10s' + '%10.3g' * 5 pf_value = pf % ('all', str(image_nums), nt.sum(), mp, mr, map, mf1) print(pf_value) if __name__ != '__main__': write_to_file(s, log_file_path) write_to_file(pf_value, log_file_path) results = [] results.append({"all": (mp, mr, map, mf1)}) # Print results per class #if verbose and nc > 1 and len(stats): if nc > 1 and len(stats): for i, c in enumerate(ap_class): #print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i])) print(pf % (names[c], '', nt[c], p[i], r[i], ap[i], f1[i])) if __name__ != '__main__': write_to_file( pf % (names[c], '', nt[c], p[i], r[i], ap[i], f1[i]), log_file_path) results.append({names[c]: (p[i], r[i], ap[i], f1[i])}) # Return results maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map, mf1), maps
def main(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" # Try to load a previously generated YOLOv3-608 network graph in ONNX format: onnx_file_path = './yolov3.onnx' engine_file_path = "yolov3.trt" data_path = "./data/unrel.data" data = parse_data_cfg(data_path) nc = int(data['classes']) # number of classes path = data['valid'] # path to test images names = load_classes(data['names']) # class names iouv = torch.linspace(0.5, 0.95, 1, dtype=torch.float32) # iou vector for [email protected]:0.95 niou = 1 conf_thres = 0.001 iou_thres = 0.6 verbose = True # Genearte custom dataloader img_size = 448 # copy form pytorch src batch_size = 16 dataset = LoadImagesAndLabels(path, img_size, batch_size, rect=True) batch_size = min(batch_size, len(dataset)) dataloader = data_loader(dataset, batch_size, img_size) # Output shapes expected by the post-processor output_shapes = [(16, 126, 14, 14), (16, 126, 28, 28), (16, 126, 56, 56)] # Do inference with TensorRT trt_outputs = [] with get_engine(onnx_file_path, engine_file_path ) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', '[email protected]', 'F1') p, r, f1, mp, mr, map, mf1, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. pbar = tqdm.tqdm(dataloader, desc=s) stats, ap, ap_class = [], [], [] seen = 0 for batch_i, (imgs, targets, paths, shapes) in enumerate(pbar): imgs = imgs.astype(np.float32) / 255.0 nb, _, height, width = imgs.shape # batch size, channels, height, width whwh = np.array([width, height, width, height]) inputs[0].host = imgs postprocessor_args = { "yolo_masks": [ (6, 7, 8), (3, 4, 5), (0, 1, 2) ], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [ (10, 13), (16, 30), (33, 23), (30, 61), ( 62, 45 ), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326) ], "num_classes": 37, "stride": [32, 16, 8] } postprocessor = PostprocessYOLO(**postprocessor_args) # Do layers before yolo t = time.time() trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes) ] trt_outputs = [ np.ascontiguousarray( otpt[:, :, :int(imgs.shape[2] * (2**i) / 32), :int(imgs.shape[3] * (2**i) / 32)], dtype=np.float32) for i, otpt in enumerate(trt_outputs) ] output_list = postprocessor.process(trt_outputs) t0 += time.time() - t inf_out = torch.cat(output_list, 1) t = time.time() output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres) # nms t1 += time.time() - t # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class seen += 1 if pred is None: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) * whwh tbox = tbox.type(torch.float32) # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero().view( -1) # prediction indices pi = (cls == pred[:, 5]).nonzero().view( -1) # target indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(pred[pi, :4], tbox[ti]).max( 1) # best ious, indices # Append detections for j in (ious > iouv[0]).nonzero(): d = ti[i[j]] # detected target if d not in detected: detected.append(d) correct[pi[j]] = ious[ j] > iouv # iou_thres is 1xn if len( detected ) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append( (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images if batch_i < 1: f = 'test_batch%g_gt.jpg' % batch_i # filename plot_images(imgs, targets, paths=paths, names=names, fname=f) # ground truth f = 'test_batch%g_pred.jpg' % batch_i plot_images(imgs, output_to_target(output, width, height), paths=paths, names=names, fname=f) # predictions # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats): p, r, ap, f1, ap_class = ap_per_class(*stats) if niou > 1: p, r, ap, f1 = p[:, 0], r[:, 0], ap.mean( 1), ap[:, 0] # [P, R, [email protected]:0.95, [email protected]] mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%10.3g' * 6 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map, mf1)) # Print results per class if verbose and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i])) # Print speeds if verbose: t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + ( img_size, img_size, batch_size) # tuple print( 'Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t)
image = image[...,::-1] image = image.astype(np.float64) # image = (image - hyp['mean']) / hyp['std'] image /= 255.0 image = np.transpose(image, [2, 0, 1]) image = np.expand_dims(image, axis=0) image = torch.from_numpy(image) image = image.to(device).float() pred = net(image)[0] pred = non_max_suppression(pred,0.25, 0.35, multi_label=False, classes=0, agnostic= False,land=True ,point_num= point_num) try: det = pred[0].cpu().detach().numpy() orig_image = orig_image.astype(np.uint8) det[:,:4] = det[:,:4] / np.array([scale_w, scale_h] * 2) det[:,5:5+point_num*2] = det[:,5:5+point_num*2] / np.array([scale_w, scale_h] * point_num) except: det = [] for b in det: text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(orig_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) cx = b[0]
def detect_image(self, image): # embed() image_shape = np.array(np.shape(image)[0:2]) num_class = len(self.class_names) # 有80类 # embed() #---------------------------------------------------------# # 给图像增加灰条(什么是灰条),实现不失真的resize #---------------------------------------------------------# # 复制image return new_image crop_img = np.array( letterbox_image( image, (self.model_image_size[1], self.model_image_size[0]))) photo = np.array(crop_img, dtype=np.float32) / 255.0 # 归一化? photo = np.transpose( photo, (2, 0, 1) ) # 转置:将Image.open(img)得到的[H,W,C]格式转换permute为pytorch可以处理的[C,H,W]格式 #---------------------------------------------------------# # 添加上batch_size维度 #---------------------------------------------------------# images = [photo] # 将photo变为list类型 with torch.no_grad( ): # disabled gradient calculation,reduce memory consumption for computations images = torch.from_numpy( np.asarray(images) ) # Creates a Tensor from a numpy.ndarray,此时images的shape为[1, 3, 416, 416] if self.cuda: images = images.cuda() #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# # embed() # 从这里开始处理 # 特征提取 # 输出outputs为tuple,len=3,每个tensor的shape分别为 第一个特征层[1, 255, 13, 13],第二个特征层[1, 255, 26, 26],第三个特征层[1, 255, 52, 52] outputs = self.net(images) # embed() output_list = [] for i in range(3): # 为什么是3 # 有三个特征层,每个特征层对应自己的decode解码器 output_list.append(self.yolo_decodes[i]( outputs[i])) # 在这里打几个断点看看 #---------------------------------------------------------# # 将预测框进行堆叠,然后进行非极大抑制 #---------------------------------------------------------# # torch.cat()对矩阵按行进行拼接得到向量 output = torch.cat(output_list, 1) # 这里也打几个断点 # output就是predictions,格式为[batch_size, num_anchors, 85] batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=self.iou) # embed() #---------------------------------------------------------# # 如果没有检测出物体,返回原图 #---------------------------------------------------------# try: batch_detections = batch_detections[0].cpu().numpy() except: return image #---------------------------------------------------------# # 对预测框进行得分筛选 #---------------------------------------------------------# # coordinates = []# bboxes的坐标 top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) # 得到坐标点 top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # coordinates.append((top_xmin,top_xmax,top_ymin,top_ymax))# 把四个坐标点看做一个整体 #-----------------------------------------------------------------# # 在图像传入网络预测前会进行letterbox_image给图像周围添加灰条 # 因此生成的top_bboxes是相对于有灰条的图像的 # 我们需要对其进行修改,去除灰条的部分。 #-----------------------------------------------------------------# # boxes存放各目标的坐标 boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = max((np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0], 1) for i, c in enumerate(top_label): # embed() predicted_class = self.class_names[c] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 # 左上角点的坐标 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) # 右下角点的坐标 bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[self.class_names.index( predicted_class)]) draw.rectangle( # 画框框 [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image, boxes # 将boxes返回
def test_net(output_dir, net, cuda, dataset, score_min, nms_max, use_07_eval=True, iou_thres=(0.1, 0.5, 0.75)): num_images = len(dataset) num_classes = len(dataset.classes) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # [x1, y1, x2, y2, score] all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)] # timers _t = {'im_detect': Timer(), 'misc': Timer()} # output_dir = get_output_dir('ssd300_120000', set_type) det_file = os.path.join(output_dir, 'detections.pkl') for i in range(num_images): _, raw_im, im = dataset.raw_transformed(i) h, w, _ = raw_im.shape x = Variable(im.unsqueeze(0)) # 1, channels, height, width if cuda: x = x.cuda() _t['im_detect'].tic() # When ssd.phase == 'test', net_output is # shaped as (batch_size, num_classes, top_k, 5) # and the last dim order is (score, xmin, ymin, xmax, ymax) detections = net(x) # Detection time, averaged detect_time = _t['im_detect'].toc(average=True) detections = non_max_suppression(detections, num_classes, score_thres=score_min, nms_thres=nms_max) # No background cls for j in range(num_classes): if detections[0] is None: continue # for class j, shape (xxx, 7) # (x1, y1, x2, y2, obj_conf, cls_conf, cls) dets = detections[0][detections[0][:, -1] == j] # select boxes with score > 0 # mask = dets[:, 0].gt(0.).expand(5, dets.size(0)).t() # dets = torch.masked_select(dets, mask).view(-1, 5) if dets.size(0) == 0: continue boxes = dets[:, :4] # (xxx, 4) ############################################# # Transform boxes back according to raw image ############################################# # Unresize max_hw = max(h, w) scale_hw = max_hw / dataset.img_shape[0] boxes *= scale_hw # Unpadding diff = abs(h - w) half_diff = diff // 2 if h <= w: pad = (0, half_diff, 0, diff - half_diff) else: pad = (half_diff, 0, diff - half_diff, 0) boxes -= torch.Tensor(pad).type_as(boxes) scores = dets[:, -2].cpu().numpy() cls_dets = np.hstack((boxes.cpu().numpy(), scores[:, np.newaxis])).astype(np.float32, copy=False) all_boxes[j][i] = cls_dets # (xxx, 5) with location and scores # store the predicted boxes and labels as .pkl with open(det_file, 'wb') as fdet: pickle.dump(all_boxes, fdet, pickle.HIGHEST_PROTOCOL) print('Write predictions to disk') # cls by cls, .txt write_voc_results_file(all_boxes, dataset, output_dir) print('Evaluating detections') aps, mAP = do_python_eval(dataset, output_dir, use_07_eval, iou_thres) return aps, mAP
if img.ndimension() == 3: img = img.unsqueeze(0) if is_CS: yolo = SoftDarknet(cfg='cfg/voc_yolov3_soft_orig-output.cfg').to(device) yolo.load_state_dict(ck_model['model']) yolo.ticket = True _ = yolo(img) else: yolo = Darknet(cfg='cfg/voc_yolov3.cfg').to(device) yolo.load_state_dict(ck_model['model']) mask = create_mask_LTH(yolo) mask.load_state_dict(ck_mask) apply_mask_LTH(yolo, mask) sparse = Ch_Wise_SparseYOLO(yolo).to(device) yolo.eval() sparse.eval() # Inference pred1 = yolo(img)[0] pred2 = sparse(img)[0] # Apply NMS pred1 = non_max_suppression(pred1, 0.3, 0.6) pred2 = non_max_suppression(pred2, 0.3, 0.6) for i, (det1, det2) in enumerate(zip(pred1, pred2)): print(det1, det2) print(torch.abs(det1 - det2))
def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array( letterbox_image(image, (image_sizes[self.phi], image_sizes[self.phi]))) photo = np.array(crop_img, dtype=np.float32) photo = np.transpose(preprocess_input(photo), (2, 0, 1)) images = [] images.append(photo) images = np.asarray(images) with torch.no_grad(): images = torch.from_numpy(images) if self.cuda: images = images.cuda() _, regression, classification, anchors = self.net(images) regression = decodebox(regression, anchors, images) detection = torch.cat([regression, classification], axis=-1) batch_detections = non_max_suppression(detection, len(self.class_names), conf_thres=self.confidence, nms_thres=0.2) try: batch_detections = batch_detections[0].cpu().numpy() except: print('置信度过高,没有找到符合条件的目标') return image, 0, 0 top_index = batch_detections[:, 4] > self.confidence top_conf = batch_detections[top_index, 4] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 boxes = efficientdet_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([image_sizes[self.phi], image_sizes[self.phi]]), image_shape) font = ImageFont.truetype(font='utils/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = (np.shape(image)[0] + np.shape(image)[1]) // image_sizes[self.phi] for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') # print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[self.class_names.index( predicted_class)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image, predicted_class, score
def detect_image(self, image_id, image): self.confidence = 0.01 self.iou = 0.5 f = open("./input/detection-results/" + image_id + ".txt", "w") image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize #---------------------------------------------------------# crop_img = np.array( letterbox_image(image, (image_sizes[self.phi], image_sizes[self.phi]))) photo = np.array(crop_img, dtype=np.float32) photo = np.transpose(preprocess_input(photo), (2, 0, 1)) with torch.no_grad(): images = torch.from_numpy(np.asarray([photo])) if self.cuda: images = images.cuda() #---------------------------------------------------------# # 传入网络当中进行预测 #---------------------------------------------------------# _, regression, classification, anchors = self.net(images) #-----------------------------------------------------------# # 将预测结果进行解码 #-----------------------------------------------------------# regression = decodebox(regression, anchors, images) detection = torch.cat([regression, classification], axis=-1) batch_detections = non_max_suppression(detection, len(self.class_names), conf_thres=self.confidence, nms_thres=self.iou) #--------------------------------------# # 如果没有检测到物体,则返回原图 #--------------------------------------# try: batch_detections = batch_detections[0].cpu().numpy() except: return #-----------------------------------------------------------# # 筛选出其中得分高于confidence的框 #-----------------------------------------------------------# top_index = batch_detections[:, 4] > self.confidence top_conf = batch_detections[top_index, 4] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) #-----------------------------------------------------------# # 去掉灰条部分 #-----------------------------------------------------------# boxes = efficientdet_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([image_sizes[self.phi], image_sizes[self.phi]]), image_shape) for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = str(top_conf[i]) top, left, bottom, right = boxes[i] f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str( int(top)), str(int(right)), str(int(bottom)))) f.close() return
def get_FPS(self, image, test_interval): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize #---------------------------------------------------------# crop_img = np.array( letterbox_image(image, (image_sizes[self.phi], image_sizes[self.phi]))) photo = np.array(crop_img, dtype=np.float32) photo = np.transpose(preprocess_input(photo), (2, 0, 1)) with torch.no_grad(): images = torch.from_numpy(np.asarray([photo])) if self.cuda: images = images.cuda() _, regression, classification, anchors = self.net(images) regression = decodebox(regression, anchors, images) detection = torch.cat([regression, classification], axis=-1) batch_detections = non_max_suppression(detection, len(self.class_names), conf_thres=self.confidence, nms_thres=self.iou) try: batch_detections = batch_detections[0].cpu().numpy() top_index = batch_detections[:, 4] > self.confidence top_conf = batch_detections[top_index, 4] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) boxes = efficientdet_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([image_sizes[self.phi], image_sizes[self.phi]]), image_shape) except: pass t1 = time.time() for _ in range(test_interval): with torch.no_grad(): _, regression, classification, anchors = self.net(images) regression = decodebox(regression, anchors, images) detection = torch.cat([regression, classification], axis=-1) batch_detections = non_max_suppression( detection, len(self.class_names), conf_thres=self.confidence, nms_thres=self.iou) try: batch_detections = batch_detections[0].cpu().numpy() top_index = batch_detections[:, 4] > self.confidence top_conf = batch_detections[top_index, 4] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims( top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) boxes = efficientdet_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array( [image_sizes[self.phi], image_sizes[self.phi]]), image_shape) except: pass t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def test(model, fetcher, conf_thres=1e-3, nms_thres=0.5): model.eval() val_loss = 0 classes = fetcher.loader.dataset.classes num_classes = len(classes) seen = 0 s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP', 'F1') p, r, f1, mp, mr, mAP, mf1 = 0., 0., 0., 0., 0., 0., 0. jdict, stats, ap, ap_class = [], [], [], [] pbar = tqdm(enumerate(fetcher), total=len(fetcher)) for idx, (imgs, targets) in pbar: _, _, height, width = imgs.shape # batch size, channels, height, width # Run model inf_out, train_out = model(imgs) # inference and training outputs # Compute loss val_loss += compute_loss(train_out, targets, model).item() # GIoU, obj, cls # Run NMS output = non_max_suppression(inf_out, conf_thres=conf_thres, nms_thres=nms_thres) # Plot images with bounding boxes if idx == 0: show_batch(imgs, output) # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class seen += 1 if pred is None: if nl: stats.append(([], torch.Tensor(), torch.Tensor(), tcls)) continue # Clip boxes to image bounds clip_coords(pred, (height, width)) # Assign all predictions as incorrect correct = [0] * len(pred) if nl: detected = [] tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) tbox[:, [0, 2]] *= width tbox[:, [1, 3]] *= height # Search for correct predictions for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred): # Break if all targets already located in image if len(detected) == nl: break # Continue if predicted class not among image classes if pcls.item() not in tcls: continue # Best iou, index between pred and targets m = (pcls == tcls_tensor).nonzero().view(-1) iou, bi = bbox_iou(pbox, tbox[m]).max(0) # If iou > threshold and class is correct mark as correct if iou > 0.5 and m[ bi] not in detected: # and pcls == tcls[bi]: correct[i] = 1 detected.append(m[bi]) # Append statistics (correct, conf, pcls, tcls) stats.append( (correct, pred[:, 4].cpu().numpy(), pred[:, 6].cpu().numpy(), tcls)) pbar.set_description('loss: %8g' % (val_loss / (idx + 1))) # Compute statistics stats = [np.concatenate(x, 0) for x in list(zip(*stats))] # sync stats if dist.is_initialized(): for i in range(len(stats)): stat = torch.FloatTensor(stats[i]).to(device) ls = torch.IntTensor([len(stat)]).to(device) ls_list = [ torch.IntTensor([0]).to(device) for _ in range(dist.get_world_size()) ] dist.all_gather(ls_list, ls) ls_list = [ls_item.item() for ls_item in ls_list] max_ls = max(ls_list) if len(stat) < max_ls: stat = torch.cat( [stat, torch.zeros(max_ls - len(stat)).to(device)]) stat_list = [ torch.zeros(max_ls).to(device) for _ in range(dist.get_world_size()) ] dist.all_gather(stat_list, stat) stat_list = [ stat_list[si][:ls_list[si]] for si in range(dist.get_world_size()) if ls_list[si] > 0 ] stat = torch.cat(stat_list) stats[i] = stat.cpu().numpy() if len(stats): p, r, ap, f1, ap_class = ap_per_class(*stats) mp, mr, mAP, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=num_classes) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%10.3g' * 6 # print format print(pf % ('all', seen, nt.sum(), mp, mr, mAP, mf1)) # Print results per class for i, c in enumerate(ap_class): print(pf % (classes[c], seen, nt[c], p[i], r[i], ap[i], f1[i])) # Return results mAPs = np.zeros(num_classes) + mAP for i, c in enumerate(ap_class): mAPs[c] = ap[i] # return (mp, mr, mAP, mf1, *(loss / len(dataloader)).tolist()), mAPs return mAP
def detect_image(self, image_id, image): self.confidence = 0.01 self.iou = 0.5 f = open("./input/detection-results/" + image_id + ".txt", "w") image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# if self.letterbox_image: crop_img = np.array( letterbox_image( image, (self.model_image_size[1], self.model_image_size[0]))) else: crop_img = image.convert('RGB') crop_img = crop_img.resize( (self.model_image_size[1], self.model_image_size[0]), Image.BICUBIC) photo = np.array(crop_img, dtype=np.float32) / 255.0 photo = np.transpose(photo, (2, 0, 1)) #---------------------------------------------------------# # 添加上batch_size维度 #---------------------------------------------------------# images = [photo] with torch.no_grad(): images = torch.from_numpy(np.asarray(images)) if self.cuda: images = images.cuda() #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) #---------------------------------------------------------# # 将预测框进行堆叠,然后进行非极大抑制 #---------------------------------------------------------# output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=self.iou) #---------------------------------------------------------# # 如果没有检测出物体,返回原图 #---------------------------------------------------------# try: batch_detections = batch_detections[0].cpu().numpy() except: return #---------------------------------------------------------# # 对预测框进行得分筛选 #---------------------------------------------------------# top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) #-----------------------------------------------------------------# # 在图像传入网络预测前会进行letterbox_image给图像周围添加灰条 # 因此生成的top_bboxes是相对于有灰条的图像的 # 我们需要对其进行修改,去除灰条的部分。 #-----------------------------------------------------------------# if self.letterbox_image: boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array( [self.model_image_size[0], self.model_image_size[1]]), image_shape) else: top_xmin = top_xmin / self.model_image_size[1] * image_shape[1] top_ymin = top_ymin / self.model_image_size[0] * image_shape[0] top_xmax = top_xmax / self.model_image_size[1] * image_shape[1] top_ymax = top_ymax / self.model_image_size[0] * image_shape[0] boxes = np.concatenate( [top_ymin, top_xmin, top_ymax, top_xmax], axis=-1) for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = str(top_conf[i]) top, left, bottom, right = boxes[i] f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str( int(top)), str(int(right)), str(int(bottom)))) f.close() return
def stream(cfg, classes_file, weights, socket_ip, socket_port, image_size=128, confidence_threshold=0.6, nms_thres=0.5): print('+ Initializing model') model = Darknet(cfg, image_size) print('+ Loading model') load_darknet_weights(model, weights) print('+ Fusing model') model.fuse() print('+ Loading model to CPU') model.to('cpu').eval() print('+ Loading webcam') cap = LoadKinect(img_size=image_size) print('+ Loading classes') classes = load_classes(classes_file) colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))] print('+ Connecting to remote socket') global sock sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect((socket_ip, socket_port)) print('+ Enumerating cam') for counter, (path, img, im0, vid_cap) in enumerate(cap): t = time.time() print('+ Loading image to CPU') img = torch.from_numpy(img).unsqueeze(0).to('cpu') pred, _ = model(img) print('+ Detecting objects') det = non_max_suppression(pred, confidence_threshold, nms_thres)[0] if det is not None and len(det) > 0: detected_classes = [] print('+ Rescaling model') det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() print('+ Reading depth') depth = get_depth() depth_swap = np.swapaxes(depth, 0, 1) depth_strip1d = np.array([ np.sort(stripe)[100] for stripe in depth_swap ]).astype(np.uint8) depth_strip2d_swap = np.array([ np.ones(depth_swap.shape[1]) * depth for depth in depth_strip1d ]).astype(np.uint8) depth_strip2d = np.swapaxes(depth_strip2d_swap, 0, 1) depth_edge1d = np.zeros(depth_strip1d.shape) state = False for counter, _ in np.ndenumerate(depth_edge1d[:-1]): state = True if not state and depth_strip1d[ counter] < 230 else False depth_edge1d[counter[0]] = not state state = False state_cnt = 0 for counter, _ in np.ndenumerate(depth_edge1d[:-1]): counter = counter[0] if depth_edge1d[counter] == state: state_cnt += 1 else: if state_cnt < 10: for r in range(max(0, counter - 10), counter): depth_edge1d[counter] = state state_cnt = 0 state = depth_edge1d[counter] depth_edge1d = depth_edge1d * 255 depth_edge2d_swap = np.array([ np.ones(100) * awddawd for awddawd in depth_edge1d ]).astype(np.uint8) depth_edge2d = np.swapaxes(depth_edge2d_swap, 0, 1) for *coordinates, conf, cls_conf, cls in det: if classes[int(cls)] in RISKY_CLASSES: label = '%s %.2f' % (classes[int(cls)], conf) plot_one_box(coordinates, im0, label=label, color=colors[int(cls)]) print(f"+ Detected {classes[int(cls)]}") x_avg_depth = np.mean(depth[coordinates[0] - 5:coordinates[0] + 5]) y_avg_depth = np.mean(depth[coordinates[1] - 5:coordinates[1] + 5]) detected_classes.append({ classes[int(cls)]: { 'x': coordinates[0], 'y': coordinates[1], 'z': np.average(np.array([x_avg_depth, y_avg_depth])) } }) n = [] for counter in detected_classes: width = im0.shape[1] x, y, z = counter[list(counter.keys())[0]].values() phi = (x / width * 2 - 1) * (CAMERA_FOV / 2) n.append(f"{list(counter.keys())[0]};{phi};{z}|") sock.send(''.join(str(x) for x in n)[:-1].encode('utf-8')) print('+ Cycle took %.3fs' % (time.time() - t)) plt.imshow(bgr_to_rgb(im0)) plt.show(block=False) plt.pause(.001)
def test( model, dataloader, iou_thres=0.5, conf_thres=0.3, nms_thres=0.45, print_interval=40, ): nC = 1 mean_mAP, mean_R, mean_P, seen = 0.0, 0.0, 0.0, 0 print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP')) outputs, mAPs, mR, mP, TP, confidence, pred_class, target_class, jdict = \ [], [], [], [], [], [], [], [], [] AP_accum, AP_accum_count = np.zeros(nC), np.zeros(nC) for batch_i, (imgs, targets, paths, shapes, targets_len) in enumerate(dataloader): t = time.time() out = model(imgs.cuda()) # out = model(imgs) output = [] for i,o in enumerate(out): boxes = xyxy2xywh(o['boxes']).cpu() scores = o['scores'].cpu().view(-1,1) labels = o['labels'].cpu().view(-1,1).float() output.append(torch.Tensor(torch.cat((boxes,scores,scores,labels),dim=1))) output = non_max_suppression(output, conf_thres=conf_thres, nms_thres=nms_thres) for i, o in enumerate(output): if o is not None: output[i] = o[:, :6] # Compute average precision for each sample targets = [targets[i][:int(l)] for i,l in enumerate(targets_len)] for si, (labels, detections) in enumerate(zip(targets, output)): seen += 1 if detections is None: # If there are labels but no detections mark as zero AP if labels.size(0) != 0: mAPs.append(0), mR.append(0), mP.append(0) continue # Get detections sorted by decreasing confidence scores detections = detections.cpu().numpy() detections = detections[np.argsort(-detections[:, 4])] # If no labels add number of detections as incorrect correct = [] if labels.size(0) == 0: # correct.extend([0 for _ in range(len(detections))]) mAPs.append(0), mR.append(0), mP.append(0) continue else: target_cls = torch.zeros_like(labels[:, 0]) target_boxes = labels[:, 2:6] detected = [] for *pred_bbox, conf, obj_conf in detections: obj_pred = 0 pred_bbox = torch.FloatTensor(pred_bbox).view(1, -1) # Compute iou with target boxes iou = bbox_iou(pred_bbox, target_boxes, x1y1x2y2=True)[0] # Extract index of largest overlap best_i = np.argmax(iou) # If overlap exceeds threshold and classification is correct mark as correct if iou[best_i] > iou_thres and best_i not in detected: correct.append(1) detected.append(best_i) else: correct.append(0) # Compute Average Precision (AP) per class AP, AP_class, R, P = ap_per_class(tp=correct, conf=detections[:, 4], pred_cls=np.zeros_like(detections[:, 5]), # detections[:, 6] target_cls=target_cls) # Accumulate AP per class AP_accum_count += np.bincount(AP_class, minlength=nC) AP_accum += np.bincount(AP_class, minlength=nC, weights=AP) # Compute mean AP across all classes in this image, and append to image list mAPs.append(AP.mean()) mR.append(R.mean()) mP.append(P.mean()) # Means of all images mean_mAP = np.sum(mAPs) / ( AP_accum_count + 1E-16) mean_R = np.sum(mR) / ( AP_accum_count + 1E-16) mean_P = np.sum(mP) / (AP_accum_count + 1E-16) if batch_i % print_interval==0: # Print image mAP and running mean mAP print(('%11s%11s' + '%11.3g' * 4 + 's') % (seen, dataloader.dataset.nF, mean_P, mean_R, mean_mAP, time.time() - t)) # Print mAP per class print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP')) print('AP: %-.4f\n\n' % (AP_accum[0] / (AP_accum_count[0] + 1E-16))) # Return mAP return mean_mAP, mean_R, mean_P
def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array( letterbox_image( image, (self.model_image_size[0], self.model_image_size[1]))) photo = np.array(crop_img, dtype=np.float32) photo /= 255.0 # 归一化 photo = np.transpose(photo, (2, 0, 1)) photo = photo.astype(np.float32) images = [] images.append(photo) images = np.asarray(images) with torch.no_grad(): images = torch.from_numpy(images) if self.cuda: images = images.cuda() outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=0.3) try: batch_detections = batch_detections[0].cpu().numpy() except: return [] top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) l = [] for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) l.append([left, top, right, bottom, score, predicted_class]) return l
def test(cfg, data, weights=None, batch_size=16, img_size=608, iou_thres=0.5, conf_thres=0.001, nms_thres=0.5, save_json=True, hyp=None, model=None, single_cls=False): """test the metrics of the trained model :param str cfg: model cfg file :param str data: data dict :param str weights: weights path :param int batch_size: batch size :param int img_size: image size :param float iou_thres: iou threshold :param float conf_thres: confidence threshold :param float nms_thres: nms threshold :param bool save_json: Whether to save the model :param str hyp: hyperparameter :param str model: yolov4 model :param bool single_cls: only one class :return: results """ if model is None: device = select_device(opt.device) verbose = False # Initialize model model = Model(cfg, img_size).to(device) # Load weights if weights.endswith('.pt'): checkpoint = torch.load(weights, map_location=device) state_dict = intersect_dicts(checkpoint['model'], model.state_dict()) model.load_state_dict(state_dict, strict=False) elif len(weights) > 0: load_darknet_weights(model, weights) print(f'Loaded weights from {weights}!') if torch.cuda.device_count() > 1: model = nn.DataParallel(model) else: device = next(model.parameters()).device verbose = False test_path = data['valid'] num_classes, names = (1, ['item']) if single_cls else (int( data['num_classes']), data['names']) # Dataloader dataset = LoadImagesAndLabels(test_path, img_size, batch_size, hyp=hyp) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, num_workers=8, pin_memory=True, collate_fn=dataset.collate_fn) seen = 0 model.eval() coco91class = coco80_to_coco91_class() output_format = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'Pre', 'Rec', 'mAP', 'F1') precision, recall, f_1, mean_pre, mean_rec, mean_ap, mf1 = 0., 0., 0., 0., 0., 0., 0. loss = torch.zeros(3) json_dict, stats, aver_pre, ap_class = [], [], [], [] for batch_i, (imgs, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=output_format)): targets = targets.to(device) imgs = imgs.to(device) / 255.0 _, _, height, width = imgs.shape # batch size, channels, height, width # Plot images with bounding boxes if batch_i == 0 and not os.path.exists('test_batch0.jpg'): plot_images(imgs=imgs, targets=targets, paths=paths, fname='test_batch0.jpg') with torch.no_grad(): inference_output, train_output = model(imgs) if hasattr(model, 'hyp'): # if model has loss hyperparameters loss += compute_loss(train_output, targets, model)[1][:3].cpu() # GIoU, obj, cls output = non_max_suppression(inference_output, conf_thres=conf_thres, nms_thres=nms_thres) # Statistics per image for i, pred in enumerate(output): labels = targets[targets[:, 0] == i, 1:] num_labels = len(labels) target_class = labels[:, 0].tolist() if num_labels else [] seen += 1 if pred is None: if num_labels: stats.append( ([], torch.Tensor(), torch.Tensor(), target_class)) continue # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... image_id = int(Path(paths[i]).stem.split('_')[-1]) box = pred[:, :4].clone() # xyxy scale_coords(imgs[i].shape[1:], box, shapes[i][0]) # to original shape box = xyxy2xywh(box) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for det_i, det in enumerate(pred): json_dict.append({ 'image_id': image_id, 'category_id': coco91class[int(det[6])], 'bbox': [float(format(x, '.%gf' % 3)) for x in box[det_i]], 'score': float(format(det[4], '.%gf' % 5)) }) # Clip boxes to image bounds clip_coords(pred, (height, width)) # Assign all predictions as incorrect correct = [0] * len(pred) if num_labels: detected = [] tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) tbox[:, [0, 2]] *= width tbox[:, [1, 3]] *= height # Search for correct predictions for j, (*pbox, _, _, pcls) in enumerate(pred): # Break if all targets already located in image if len(detected) == num_labels: break # Continue if predicted class not among image classes if pcls.item() not in target_class: continue # Best iou, index between pred and targets mask = (pcls == tcls_tensor).nonzero( as_tuple=False).view(-1) iou, best_iou = bbox_iou(pbox, tbox[mask]).max(0) # If iou > threshold and class is correct mark as correct if iou > iou_thres and mask[ best_iou] not in detected: # and pcls == target_class[bi]: correct[j] = 1 detected.append(mask[best_iou]) # Append statistics (correct, conf, pcls, target_class) stats.append( (correct, pred[:, 4].cpu(), pred[:, 6].cpu(), target_class)) # Compute statistics stats = [np.concatenate(x, 0) for x in list(zip(*stats))] if len(stats): precision, recall, aver_pre, f_1, ap_class = ap_per_class(*stats) mean_pre, mean_rec, mean_ap, mf1 = precision.mean(), recall.mean( ), aver_pre.mean(), f_1.mean() num_targets = np.bincount( stats[3].astype(np.int64), minlength=num_classes) # number of targets per class else: num_targets = torch.zeros(1) # Print results print_format = '%20s' + '%10.3g' * 6 print(print_format % ('all', seen, num_targets.sum(), mean_pre, mean_rec, mean_ap, mf1)) # Print results per class if verbose and num_classes > 1 and stats: for i, class_ in enumerate(ap_class): print(print_format % (names[class_], seen, num_targets[class_], precision[i], recall[i], aver_pre[i], f_1[i])) # Save JSON if save_json and mean_ap and json_dict: try: img_ids = [ int(Path(x).stem.split('_')[-1]) for x in dataset.img_files ] with open('results.json', 'w') as file: json.dump(json_dict, file) # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb cocogt = COCO('data/coco/annotations/instances_val2017.json' ) # initialize COCO ground truth api cocodt = cocogt.loadRes('results.json') # initialize COCO pred api cocoeval = COCOeval(cocogt, cocodt, 'bbox') cocoeval.params.imgIds = img_ids # [:32] # only evaluate these images cocoeval.evaluate() cocoeval.accumulate() cocoeval.summarize() mean_ap = cocoeval.stats[1] # update mAP to pycocotools mAP except ImportError: print( 'WARNING: missing dependency pycocotools from requirements.txt. Can not compute official COCO mAP.' ) # Return results maps = np.zeros(num_classes) + mean_ap for i, class_ in enumerate(ap_class): maps[class_] = aver_pre[i] return (mean_pre, mean_rec, mean_ap, mf1, *(loss / len(dataloader)).tolist()), maps
def generate_box(self, image): image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array(letterbox_image(image, (self.model_image_size[0], self.model_image_size[1]))) photo = np.array(crop_img, dtype=np.float32) photo /= 255.0 photo = np.transpose(photo, (2, 0, 1)) photo = photo.astype(np.float32) images = [] images.append(photo) images = np.asarray(images) images = torch.from_numpy(images) if self.cuda: images = images.cuda() with torch.no_grad(): outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, self.config["yolo"]["classes"], conf_thres=self.confidence, nms_thres=0.3) try: batch_detections = batch_detections[0].cpu().numpy() except: boxlist = [] return boxlist top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 boxes = yolo_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0] boxlist = [] for i, c in enumerate(top_label): top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32')) box_str = str(left) + ',' + str(top) + ',' + str(right) + ',' + str(bottom) boxlist.append(box_str) return boxlist
def get_FPS(self, image, test_interval): image_shape = np.array(np.shape(image)[0:2]) if self.letterbox_image: crop_img = np.array( letterbox_image( image, (self.model_image_size[1], self.model_image_size[0]))) else: crop_img = image.convert('RGB') crop_img = crop_img.resize( (self.model_image_size[1], self.model_image_size[0]), Image.BICUBIC) photo = np.array(crop_img, dtype=np.float32) / 255.0 photo = np.transpose(photo, (2, 0, 1)) images = [photo] with torch.no_grad(): images = torch.from_numpy(np.asarray(images)) if self.cuda: images = images.cuda() outputs = self.net(images) output_list = [] for i in range(2): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=self.iou) try: batch_detections = batch_detections[0].cpu().numpy() top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) if self.letterbox_image: boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([ self.model_image_size[0], self.model_image_size[1] ]), image_shape) else: top_xmin = top_xmin / self.model_image_size[ 1] * image_shape[1] top_ymin = top_ymin / self.model_image_size[ 0] * image_shape[0] top_xmax = top_xmax / self.model_image_size[ 1] * image_shape[1] top_ymax = top_ymax / self.model_image_size[ 0] * image_shape[0] boxes = np.concatenate( [top_ymin, top_xmin, top_ymax, top_xmax], axis=-1) except: pass t1 = time.time() for _ in range(test_interval): with torch.no_grad(): outputs = self.net(images) output_list = [] for i in range(2): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression( output, len(self.class_names), conf_thres=self.confidence, nms_thres=self.iou) try: batch_detections = batch_detections[0].cpu().numpy() top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[ top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims( top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) if self.letterbox_image: boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([ self.model_image_size[0], self.model_image_size[1] ]), image_shape) else: top_xmin = top_xmin / self.model_image_size[ 1] * image_shape[1] top_ymin = top_ymin / self.model_image_size[ 0] * image_shape[0] top_xmax = top_xmax / self.model_image_size[ 1] * image_shape[1] top_ymax = top_ymax / self.model_image_size[ 0] * image_shape[0] boxes = np.concatenate( [top_ymin, top_xmin, top_ymax, top_xmax], axis=-1) except: pass t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize #---------------------------------------------------------# crop_img = np.array( letterbox_image(image, [self.input_shape[1], self.input_shape[0]])) photo = np.array(crop_img, dtype=np.float32) photo = np.transpose(preprocess_input(photo), (2, 0, 1)) with torch.no_grad(): images = torch.from_numpy(np.asarray([photo])) if self.cuda: images = images.cuda() #---------------------------------------------------------# # 传入网络当中进行预测 #---------------------------------------------------------# _, regression, classification, anchors = self.net(images) #-----------------------------------------------------------# # 将预测结果进行解码 #-----------------------------------------------------------# regression = decodebox(regression, anchors, images) detection = torch.cat([regression, classification], axis=-1) batch_detections = non_max_suppression(detection, len(self.class_names), conf_thres=self.confidence, nms_thres=self.iou) #--------------------------------------# # 如果没有检测到物体,则返回原图 #--------------------------------------# try: batch_detections = batch_detections[0].cpu().numpy() except: return image #-----------------------------------------------------------# # 筛选出其中得分高于confidence的框 #-----------------------------------------------------------# top_index = batch_detections[:, 4] > self.confidence top_conf = batch_detections[top_index, 4] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) #-----------------------------------------------------------# # 去掉灰条部分 #-----------------------------------------------------------# boxes = retinanet_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.input_shape[0], self.input_shape[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = max( (np.shape(image)[0] + np.shape(image)[1]) // self.input_shape[0], 1) for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[self.class_names.index( predicted_class)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image