def detect_image(self, image_id, image): self.confidence = 0.05 f = open("./input/detection-results/" + image_id + ".txt", "w") image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array( letterbox_image( image, (self.model_image_size[0], self.model_image_size[1]))) photo = np.array(crop_img, dtype=np.float64) # 图片预处理,归一化 with torch.no_grad(): photo = Variable( torch.from_numpy( np.expand_dims(np.transpose(crop_img - MEANS, (2, 0, 1)), 0)).type(torch.FloatTensor)) if self.cuda: photo = photo.cuda() preds = self.net(photo) top_conf = [] top_label = [] top_bboxes = [] for i in range(preds.size(1)): j = 0 while preds[0, i, j, 0] >= self.confidence: score = preds[0, i, j, 0] label_name = self.class_names[i - 1] pt = (preds[0, i, j, 1:]).detach().numpy() coords = [pt[0], pt[1], pt[2], pt[3]] top_conf.append(score) top_label.append(label_name) top_bboxes.append(coords) j = j + 1 # 将预测结果进行解码 if len(top_conf) <= 0: return image top_conf = np.array(top_conf) top_label = np.array(top_label) top_bboxes = np.array(top_bboxes) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 boxes = ssd_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) for i, c in enumerate(top_label): predicted_class = c score = str(float(top_conf[i])) top, left, bottom, right = boxes[i] f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str( int(top)), str(int(right)), str(int(bottom)))) f.close() return
def get_FPS(self, image, test_interval): image = np.array(image,np.float32) im_height, im_width, _ = np.shape(image) scale = [np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]] scale_for_landmarks = [np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]] if self.letterbox_image: image = np.array(letterbox_image(image,[self.input_shape[1], self.input_shape[0]]), np.float32) else: self.anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors() with torch.no_grad(): image = torch.from_numpy(preprocess_input(image).transpose(2, 0, 1)).unsqueeze(0) if self.cuda: self.anchors = self.anchors.cuda() image = image.cuda() loc, conf, landms = self.net(image) boxes = decode(loc.data.squeeze(0), self.anchors, self.cfg['variance']) boxes = boxes.cpu().numpy() conf = conf.data.squeeze(0)[:,1:2].cpu().numpy() landms = decode_landm(landms.data.squeeze(0), self.anchors, self.cfg['variance']) landms = landms.cpu().numpy() boxes_conf_landms = np.concatenate([boxes, conf, landms],-1) boxes_conf_landms = non_max_suppression(boxes_conf_landms, self.confidence) if len(boxes_conf_landms)>0: if self.letterbox_image: boxes_conf_landms = retinaface_correct_boxes(boxes_conf_landms, np.array([self.input_shape[0], self.input_shape[1]]), np.array([im_height, im_width])) boxes_conf_landms[:,:4] = boxes_conf_landms[:,:4]*scale boxes_conf_landms[:,5:] = boxes_conf_landms[:,5:]*scale_for_landmarks t1 = time.time() for _ in range(test_interval): with torch.no_grad(): loc, conf, landms = self.net(image) boxes = decode(loc.data.squeeze(0), self.anchors, self.cfg['variance']) boxes = boxes.cpu().numpy() conf = conf.data.squeeze(0)[:,1:2].cpu().numpy() landms = decode_landm(landms.data.squeeze(0), self.anchors, self.cfg['variance']) landms = landms.cpu().numpy() boxes_conf_landms = np.concatenate([boxes, conf, landms],-1) boxes_conf_landms = non_max_suppression(boxes_conf_landms, self.confidence) if len(boxes_conf_landms)>0: if self.letterbox_image: boxes_conf_landms = retinaface_correct_boxes(boxes_conf_landms, np.array([self.input_shape[0], self.input_shape[1]]), np.array([im_height, im_width])) boxes_conf_landms[:,:4] = boxes_conf_landms[:,:4]*scale boxes_conf_landms[:,5:] = boxes_conf_landms[:,5:]*scale_for_landmarks t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array( letterbox_image( image, (self.model_image_size[0], self.model_image_size[1]))) photo = np.array(crop_img, dtype=np.float64) # 图片预处理,归一化 photo = Variable( torch.from_numpy( np.expand_dims(np.transpose(crop_img - MEANS, (2, 0, 1)), 0)).cuda().type(torch.FloatTensor)) preds = self.net(photo) top_conf = [] top_label = [] top_bboxes = [] for i in range(preds.size(1)): j = 0 while preds[0, i, j, 0] >= self.confidence: score = preds[0, i, j, 0] label_name = self.class_names[i - 1] pt = (preds[0, i, j, 1:]).detach().numpy() coords = [pt[0], pt[1], pt[2], pt[3]] top_conf.append(score) top_label.append(label_name) top_bboxes.append(coords) j = j + 1 # 将预测结果进行解码 if len(top_conf) <= 0: return image top_conf = np.array(top_conf) top_label = np.array(top_label) top_bboxes = np.array(top_bboxes) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 boxes = ssd_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0] for i, c in enumerate(top_label): predicted_class = c score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[self.class_names.index( predicted_class)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
# criterion = MultiBoxLoss(3, 0.5, True, 0, True, 3, 0.5,False, True) model = ssd.net.module imgPath = '1.bmp' image = Image.open(imgPath) image.show() image_size = image.size image = image.convert('RGB') # r_image = ssd.detect_image(image) # r_image.show() # -------------------------------------------- # image = Image.open(imgPath) image = image.convert('RGB') image_shape = np.array(np.shape(image)[0:2]) # 获得图片的尺寸 crop_img = np.array(letterbox_image(image, (300, 300))) photo = np.array(crop_img, dtype=np.float64) # 类型转为dtype = np.float64 photo = torch.from_numpy( np.expand_dims(np.transpose(crop_img - MEANS, (2, 0, 1)), 0)).type( torch.FloatTensor).requires_grad_(True) # 将颜色通道对应的维度调整到前面 photo = photo.requires_grad_().cuda() # .cpu() # preds = model(photo) # torch.Size([1, 3, 200, 5]) # pass guided_bp = Guided_backprop(model) result = guided_bp.visualize(photo, None) result = normalize(result) plt.imshow(result)
def detect_image(self, image): #---------------------------------------------------# # 对输入图像进行一个备份,后面用于绘图 #---------------------------------------------------# old_image = image.copy() image = np.array(image, np.float32) #---------------------------------------------------# # 计算scale,用于将获得的预测框转换成原图的高宽 #---------------------------------------------------# scale = [ np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0] ] scale_for_landmarks = [ np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0] ] im_height, im_width, _ = np.shape(image) #---------------------------------------------------------# # letterbox_image可以给图像增加灰条,实现不失真的resize #---------------------------------------------------------# if self.letterbox_image: image = np.array( letterbox_image(image, [self.input_shape[1], self.input_shape[0]]), np.float32) else: self.anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors() with torch.no_grad(): #-----------------------------------------------------------# # 图片预处理,归一化。 #-----------------------------------------------------------# image = torch.from_numpy( preprocess_input(image).transpose(2, 0, 1)).unsqueeze(0) if self.cuda: self.anchors = self.anchors.cuda() image = image.cuda() loc, conf = self.net(image) #-----------------------------------------------------------# # 将预测结果进行解码 #-----------------------------------------------------------# boxes = decode(loc.data.squeeze(0), self.anchors, self.cfg['variance']) boxes = boxes.cpu().numpy() conf = conf.data.squeeze(0)[:, 1:2].cpu().numpy() boxes_conf_landms = np.concatenate([boxes, conf], -1) print(boxes_conf_landms.shape) # boxes_conf_landms = non_max_suppression(boxes_conf_landms, 0.9) mask = boxes_conf_landms[:, 4] >= 0.15 boxes_conf_landms = boxes_conf_landms[mask] print(boxes_conf_landms.shape) if len(boxes_conf_landms) <= 0: return old_image #---------------------------------------------------------# # 如果使用了letterbox_image的话,要把灰条的部分去除掉。 #---------------------------------------------------------# if self.letterbox_image: boxes_conf_landms = retinaface_correct_boxes(boxes_conf_landms, \ np.array([self.input_shape[0], self.input_shape[1]]), np.array([im_height, im_width])) boxes_conf_landms[:, :4] = boxes_conf_landms[:, :4] * scale for b in boxes_conf_landms: text = "{:.4f}".format(b[4]) b = list(map(int, b)) # b[0]-b[3]为人脸框的坐标,b[4]为得分 cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) cx = b[0] cy = b[1] + 12 cv2.putText(old_image, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) print(b[0], b[1], b[2], b[3], b[4]) # b[5]-b[14]为人脸关键点的坐标 return old_image
def detect_image(self,image_id,image): f = open("./input/detection-results/"+image_id+".txt","w") image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# if self.letterbox_image: crop_img = np.array(letterbox_image(image, (self.input_shape[1],self.input_shape[0]))) else: crop_img = image.convert('RGB') crop_img = crop_img.resize((self.input_shape[1],self.input_shape[0]), Image.BICUBIC) photo = np.array(crop_img,dtype = np.float64) with torch.no_grad(): photo = Variable(torch.from_numpy(np.expand_dims(np.transpose(photo - MEANS, (2,0,1)),0)).type(torch.FloatTensor)) if self.cuda: photo = photo.cuda() preds = self.net(photo) top_conf = [] top_label = [] top_bboxes = [] for i in range(preds.size(1)): j = 0 while preds[0, i, j, 0] >= self.confidence: score = preds[0, i, j, 0] label_name = self.class_names[i-1] pt = (preds[0, i, j, 1:]).detach().numpy() coords = [pt[0], pt[1], pt[2], pt[3]] top_conf.append(score) top_label.append(label_name) top_bboxes.append(coords) j = j + 1 if len(top_conf)<=0: return top_conf = np.array(top_conf) top_label = np.array(top_label) top_bboxes = np.array(top_bboxes) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1) #-----------------------------------------------------------# # 去掉灰条部分 #-----------------------------------------------------------# if self.letterbox_image: boxes = ssd_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.input_shape[0],self.input_shape[1]]),image_shape) else: top_xmin = top_xmin * image_shape[1] top_ymin = top_ymin * image_shape[0] top_xmax = top_xmax * image_shape[1] top_ymax = top_ymax * image_shape[0] boxes = np.concatenate([top_ymin,top_xmin,top_ymax,top_xmax], axis=-1) for i, c in enumerate(top_label): predicted_class = c score = str(float(top_conf[i])) top, left, bottom, right = boxes[i] f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)),str(int(bottom)))) f.close() return
def get_FPS(self, image, test_interval): # 调整图片使其符合输入要求 image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array( letterbox_image(image, (self.input_shape[1], self.input_shape[0]))) photo = np.array(crop_img, dtype=np.float64) # 图片预处理,归一化 with torch.no_grad(): photo = Variable( torch.from_numpy( np.expand_dims(np.transpose(crop_img - MEANS, (2, 0, 1)), 0)).type(torch.FloatTensor)) if self.cuda: photo = photo.cuda() preds = self.net(photo) top_conf = [] top_label = [] top_bboxes = [] for i in range(preds.size(1)): j = 0 while preds[0, i, j, 0] >= self.confidence: score = preds[0, i, j, 0] label_name = self.class_names[i - 1] pt = (preds[0, i, j, 1:]).detach().numpy() coords = [pt[0], pt[1], pt[2], pt[3]] top_conf.append(score) top_label.append(label_name) top_bboxes.append(coords) j = j + 1 # 将预测结果进行解码 if len(top_conf) > 0: top_conf = np.array(top_conf) top_label = np.array(top_label) top_bboxes = np.array(top_bboxes) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 boxes = ssd_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.input_shape[0], self.input_shape[1]]), image_shape) t1 = time.time() for _ in range(test_interval): with torch.no_grad(): preds = self.net(photo) top_conf = [] top_label = [] top_bboxes = [] for i in range(preds.size(1)): j = 0 while preds[0, i, j, 0] >= self.confidence: score = preds[0, i, j, 0] label_name = self.class_names[i - 1] pt = (preds[0, i, j, 1:]).detach().numpy() coords = [pt[0], pt[1], pt[2], pt[3]] top_conf.append(score) top_label.append(label_name) top_bboxes.append(coords) j = j + 1 # 将预测结果进行解码 if len(top_conf) > 0: top_conf = np.array(top_conf) top_label = np.array(top_label) top_bboxes = np.array(top_bboxes) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims( top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 boxes = ssd_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.input_shape[0], self.input_shape[1]]), image_shape) t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------# # 不失真的resize,给图像周围增加灰条 #---------------------------------------------------# crop_img = np.array( letterbox_image(image, (self.input_shape[1], self.input_shape[0]))) with torch.no_grad(): #---------------------------------------------------# # 图片预处理,归一化 #---------------------------------------------------# photo = Variable( torch.from_numpy( np.expand_dims(np.transpose(crop_img - MEANS, (2, 0, 1)), 0)).type(torch.FloatTensor)) if self.cuda: photo = photo.cuda() #---------------------------------------------------# # 传入网络进行预测 #---------------------------------------------------# preds = self.net(photo) top_conf = [] top_label = [] top_bboxes = [] #---------------------------------------------------# # preds的shape为 1, num_classes, top_k, 5 #---------------------------------------------------# for i in range(preds.size(1)): j = 0 while preds[0, i, j, 0] >= self.confidence: #---------------------------------------------------# # score为当前预测框的得分 # label_name为预测框的种类 #---------------------------------------------------# score = preds[0, i, j, 0] label_name = self.class_names[i - 1] #---------------------------------------------------# # pt的shape为4, 当前预测框的左上角右下角 #---------------------------------------------------# pt = (preds[0, i, j, 1:]).detach().numpy() coords = [pt[0], pt[1], pt[2], pt[3]] top_conf.append(score) top_label.append(label_name) top_bboxes.append(coords) j = j + 1 # 如果不存在满足门限的预测框,直接返回原图 if len(top_conf) <= 0: return image top_conf = np.array(top_conf) top_label = np.array(top_label) top_bboxes = np.array(top_bboxes) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) #-----------------------------------------------------------# # 去掉灰条部分 #-----------------------------------------------------------# boxes = ssd_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.input_shape[0], self.input_shape[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = max( (np.shape(image)[0] + np.shape(image)[1]) // self.input_shape[0], 1) for i, c in enumerate(top_label): predicted_class = c score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[self.class_names.index( predicted_class)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
def get_FPS(self, image, test_interval): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# if self.letterbox_image: crop_img = np.array(letterbox_image(image, (self.input_shape[1],self.input_shape[0]))) else: crop_img = image.convert('RGB') crop_img = crop_img.resize((self.input_shape[1],self.input_shape[0]), Image.BICUBIC) photo = np.array(crop_img,dtype = np.float64) with torch.no_grad(): photo = Variable(torch.from_numpy(np.expand_dims(np.transpose(photo-MEANS,(2,0,1)),0)).type(torch.FloatTensor)) if self.cuda: photo = photo.cuda() preds = self.net(photo) top_conf = [] top_label = [] top_bboxes = [] for i in range(preds.size(1)): j = 0 while preds[0, i, j, 0] >= self.confidence: score = preds[0, i, j, 0] label_name = self.class_names[i-1] pt = (preds[0, i, j, 1:]).detach().numpy() coords = [pt[0], pt[1], pt[2], pt[3]] top_conf.append(score) top_label.append(label_name) top_bboxes.append(coords) j = j + 1 if len(top_conf)>0: top_conf = np.array(top_conf) top_label = np.array(top_label) top_bboxes = np.array(top_bboxes) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1) #-----------------------------------------------------------# # 去掉灰条部分 #-----------------------------------------------------------# if self.letterbox_image: boxes = ssd_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.input_shape[0],self.input_shape[1]]),image_shape) else: top_xmin = top_xmin * image_shape[1] top_ymin = top_ymin * image_shape[0] top_xmax = top_xmax * image_shape[1] top_ymax = top_ymax * image_shape[0] boxes = np.concatenate([top_ymin,top_xmin,top_ymax,top_xmax], axis=-1) t1 = time.time() for _ in range(test_interval): with torch.no_grad(): preds = self.net(photo) top_conf = [] top_label = [] top_bboxes = [] for i in range(preds.size(1)): j = 0 while preds[0, i, j, 0] >= self.confidence: score = preds[0, i, j, 0] label_name = self.class_names[i-1] pt = (preds[0, i, j, 1:]).detach().numpy() coords = [pt[0], pt[1], pt[2], pt[3]] top_conf.append(score) top_label.append(label_name) top_bboxes.append(coords) j = j + 1 if len(top_conf)>0: top_conf = np.array(top_conf) top_label = np.array(top_label) top_bboxes = np.array(top_bboxes) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1) #-----------------------------------------------------------# # 去掉灰条部分 #-----------------------------------------------------------# if self.letterbox_image: boxes = ssd_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.input_shape[0],self.input_shape[1]]),image_shape) else: top_xmin = top_xmin * image_shape[1] top_ymin = top_ymin * image_shape[0] top_xmax = top_xmax * image_shape[1] top_ymax = top_ymax * image_shape[0] boxes = np.concatenate([top_ymin,top_xmin,top_ymax,top_xmax], axis=-1) t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def detect_image(self, image): self.confidence = 0.02 image = np.array(image, np.float32) #---------------------------------------------------# # 计算scale,用于将获得的预测框转换成原图的高宽 #---------------------------------------------------# scale = [ np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0] ] scale_for_landmarks = [ np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0] ] im_height, im_width, _ = np.shape(image) #---------------------------------------------------------# # letterbox_image可以给图像增加灰条,实现不失真的resize #---------------------------------------------------------# if self.letterbox_image: image = np.array( letterbox_image(image, [self.input_shape[1], self.input_shape[0]]), np.float32) else: self.anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors() with torch.no_grad(): #-----------------------------------------------------------# # 图片预处理,归一化。 #-----------------------------------------------------------# image = torch.from_numpy( preprocess_input(image).transpose(2, 0, 1)).unsqueeze(0) if self.cuda: self.anchors = self.anchors.cuda() image = image.cuda() loc, conf, landms = self.net(image) #-----------------------------------------------------------# # 将预测结果进行解码 #-----------------------------------------------------------# boxes = decode(loc.data.squeeze(0), self.anchors, self.cfg['variance']) boxes = boxes.cpu().numpy() conf = conf.data.squeeze(0)[:, 1:2].cpu().numpy() landms = decode_landm(landms.data.squeeze(0), self.anchors, self.cfg['variance']) landms = landms.cpu().numpy() boxes_conf_landms = np.concatenate([boxes, conf, landms], -1) boxes_conf_landms = non_max_suppression(boxes_conf_landms, self.confidence) if len(boxes_conf_landms) <= 0: return np.array([]) #---------------------------------------------------------# # 如果使用了letterbox_image的话,要把灰条的部分去除掉。 #---------------------------------------------------------# if self.letterbox_image: boxes_conf_landms = retinaface_correct_boxes( boxes_conf_landms, np.array([self.input_shape[0], self.input_shape[1]]), np.array([im_height, im_width])) boxes_conf_landms[:, :4] = boxes_conf_landms[:, :4] * scale boxes_conf_landms[:, 5:] = boxes_conf_landms[:, 5:] * scale_for_landmarks return boxes_conf_landms
def run(self): #计数 count = 0 # print('{} model, anchors, and classes loaded.'.format(self.Config["model_path"])) image_shape = np.array(np.shape(self.image)[0:2]) # letterbox_image边缘加灰条仿失真,改变总体图片大小为300*300 crop_img = np.array( letterbox_image(self.image, (self.Config["model_image_size"][0], self.Config["model_image_size"][1]))) # 转化float64 photo = np.array(crop_img, dtype=np.float64) # 图片预处理,归一化 # photo = Variable(torch.from_numpy(np.expand_dims(np.transpose(crop_img-MEANS,(2,0,1)),0)).type(torch.FloatTensor)) #将通道数从第三维度调整到第一维度 !cudaGPU加速.cuda().type(torch.FloatTensor)) # crop_img - MEANS所有像素点-平均值 相当于标准化 photo = torch.from_numpy( np.expand_dims(np.transpose(crop_img - MEANS, (2, 0, 1)), 0)).type(torch.FloatTensor) # preds.shape(1,21,200,5)((这张图片)bacth_size,类别num_class,得分最高200框,参数) preds = self.net(photo) top_conf = [] top_label = [] top_bboxes = [] # 置信度筛选 for i in range(preds.size(1)): j = 0 while preds[0, i, j, 0] >= self.Config["confidence"]: score = preds[0, i, j, 0] label_name = self.class_names[i - 1] pt = (preds[0, i, j, 1:]).detach().numpy() coords = [pt[0], pt[1], pt[2], pt[3]] top_conf.append(score) top_label.append(label_name) top_bboxes.append(coords) j = j + 1 # 将预测结果进行解码 if len(top_conf) <= 0: self.msg.emit("图中一共有:0 个 目标对象") self.set_image.emit("") self.msg.emit("预测完成") else: top_conf = np.array(top_conf) top_label = np.array(top_label) top_bboxes = np.array(top_bboxes) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 boxes = ssd_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([ self.Config["model_image_size"][0], self.Config["model_image_size"][1] ]), image_shape) # 字体 font = ImageFont.truetype( font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(self.image)[1] + 0.5).astype('int32')) # 厚度 thickness = (np.shape(self.image)[0] + np.shape( self.image)[1]) // self.Config["model_image_size"][0] for i, c in enumerate(top_label): count += 1 predicted_class = c score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(self.image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(self.image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}-{}'.format(predicted_class, score, count) draw = ImageDraw.Draw(self.image) label_size = draw.textsize(label, font) label = label.encode('utf-8') self.msg.emit(str(label.decode())) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[self.class_names.index( predicted_class)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw self.msg.emit("图中一共有:" + str(count) + " 个 目标对象") # 可更改 self.path = "neural_network/img/predict_output.png" self.image.save(self.path) self.set_image.emit(self.path) self.msg.emit("预测完成")
def detect_image(self, image): image_shape = np.array( np.shape(image)[0:2]) # 获得图片的尺寸array([1330, 1330]) # 在检测中,需要把原始图像转换为,与目标尺寸匹配的图像,保持等比例,其余部分用灰色填充。 crop_img = np.array( letterbox_image( image, (self.model_image_size[0], self.model_image_size[1]))) # 以下这行代码多余 photo = np.array(crop_img, dtype=np.float64) # 类型转为dtype = np.float64 # 图片预处理,归一化 with torch.no_grad( ): # 表示不进行求导 即使张量的requires_grad = True,也不求梯度 参考链接: https://blog.csdn.net/weixin_43178406/article/details/89517008?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522159644470619195264563535%2522%252C%2522scm%2522%253A%252220140713.130102334..%2522%257D&request_id=159644470619195264563535&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2~all~first_rank_ecpm_v3~pc_rank_v4-4-89517008.first_rank_ecpm_v3_pc_rank_v4&utm_term=with+torch.no_grad%28%29&spm=1018.2118.3001.4187 photo = Variable( torch.from_numpy( np.expand_dims(np.transpose(crop_img - MEANS, (2, 0, 1)), 0)).type( torch.FloatTensor)) # 将颜色通道对应的维度调整到前面 # 标准化 通道维度调整到最前的维度 扩充维度 移动到GPU # 【Python】 numpy.expand_dims的用法 参考链接: https://blog.csdn.net/qingzhuyuxian/article/details/90510203?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522159645220719195188351469%2522%252C%2522scm%2522%253A%252220140713.130102334..%2522%257D&request_id=159645220719195188351469&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2~all~top_click~default-2-90510203.first_rank_ecpm_v3_pc_rank_v4&utm_term=np.expand_dims&spm=1018.2118.3001.4187 if self.cuda: photo = photo.cuda() # torch.Size([1, 3, 300, 300]) preds = self.net(photo) # torch.Size([1, 3, 200, 5]) 1置信度+4位置信息 # top_conf = [] # 保存单张图片中所有预测框的置信度 top_label = [] # 保存单张图片中所有预测框的物体类别 top_bboxes = [] # 保存单张图片中所有预测框的位置信息 for i in range( 1, preds.size(1)): # 循环遍历21个类 i=0,1,2...,20 # 跳过背景所在的类,0代表背景 # for i in range(1,preds.size(1)): for i in range(preds.size(1)): j = 0 while preds[0, i, j, 0] >= self.confidence: # 对200个不同置信度的框的筛选 # while preds[0, i, j, 0] >= self.confidence and j < preds.size(2): # 我的修改 ************************************************************************************************************************************************************************************************************************************************************************************ score = preds[0, i, j, 0] # print("cxqcxq 陈旭旗陈旭旗self.class_names :",self.class_names) # cxqcxq 陈旭旗陈旭旗self.class_names : ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] label_name = self.class_names[ i - 1] # 有疑问 为什么不是 label_name = self.class_names[i]*************************************************************************************************************** pt = (preds[0, i, j, 1:]).detach().numpy() coords = [pt[0], pt[1], pt[2], pt[3]] top_conf.append(score) top_label.append(label_name) top_bboxes.append(coords) j = j + 1 # 将预测结果进行解码 if len(top_conf) <= 0: # 表示没有检测到任何东西,即全是背景 return image top_conf = np.array(top_conf) top_label = np.array(top_label) top_bboxes = np.array(top_bboxes) top_xmin, top_ymin, top_xmax, top_ymax = \ np.expand_dims(top_bboxes[:,0],-1),\ np.expand_dims(top_bboxes[:,1],-1),\ np.expand_dims(top_bboxes[:,2],-1),\ np.expand_dims(top_bboxes[:,3],-1) # 去掉灰条 boxes = ssd_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,\ np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0] # 8 for i, c in enumerate(top_label): predicted_class = c score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) # (240, 40) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1] ]) # array([460, 654]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): # 用于对边框加粗,起始可以使用width参数 draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[self.class_names.index( predicted_class)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
def detect_image(self, image, picpath, frameNo): #---------------------------------------------------# # 对输入图像进行一个备份,后面用于绘图 #---------------------------------------------------# old_image = image.copy() image = np.array(image,np.float32) #---------------------------------------------------# # 计算scale,用于将获得的预测框转换成原图的高宽 #---------------------------------------------------# scale = [np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]] scale_for_landmarks = [np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]] im_height, im_width, _ = np.shape(image) #---------------------------------------------------------# # letterbox_image可以给图像增加灰条,实现不失真的resize #---------------------------------------------------------# if self.letterbox_image: image = np.array(letterbox_image(image, [self.input_shape[1], self.input_shape[0]]), np.float32) else: self.anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors() with torch.no_grad(): #-----------------------------------------------------------# # 图片预处理,归一化。 #-----------------------------------------------------------# image = torch.from_numpy(preprocess_input(image).transpose(2, 0, 1)).unsqueeze(0) if self.cuda: self.anchors = self.anchors.cuda() image = image.cuda() loc, conf, landms = self.net(image) #-----------------------------------------------------------# # 将预测结果进行解码 #-----------------------------------------------------------# boxes = decode(loc.data.squeeze(0), self.anchors, self.cfg['variance']) boxes = boxes.cpu().numpy() conf = conf.data.squeeze(0)[:,1:2].cpu().numpy() landms = decode_landm(landms.data.squeeze(0), self.anchors, self.cfg['variance']) landms = landms.cpu().numpy() boxes_conf_landms = np.concatenate([boxes, conf, landms],-1) #到这里为止我们已经利用Retinaface_pytorch的预训练模型检测完了人脸,并获得了人脸框和人脸五个特征点的坐标信息,全保存在dets中,接下来为人脸剪切部分 # 用来储存生成的单张人脸的路径 path_save = "./curve/faces/" #你可以将这里的路径换成你自己的路径 ''' #剪切图片 #if args.show_cutting_image: for num, b in enumerate(boxes_conf_landms): # dets中包含了人脸框和五个特征点的坐标 #if b[4] < 0.6: # continue b = list(map(int, b)) # landms,在人脸上画出特征点,要是你想保存不显示特征点的人脸图,你可以把这里注释掉 cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4) cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4) cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4) cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4) cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4) #计算人脸框矩形大小 Height = b[3] - b[1] Width = b[2] - b[0] # 显示人脸矩阵大小 print("人脸数 / faces in all:", str(num+1), "\n") print("窗口大小 / The size of window:" , '\n', "高度 / height:", Height , '\n', "宽度 / width: ", Width) #根据人脸框大小,生成空白的图片 img_blank = np.zeros((Height, Width, 3), np.uint8) # 将人脸填充到空白图片 for h in range(Height): for w in range(Width): img_blank[h][w] = old_image[b[1] + h][b[0] + w] cv2.namedWindow("img_faces") # , 2) #cv2.imshow("img_faces", img_blank) #显示图片 cv2.imwrite(path_save + "img_face_4" + str(num + 1) + ".jpg", img_blank) #将图片保存至你指定的文件夹 print("Save into:", path_save + "img_face_4" + str(num + 1) + ".jpg") cv2.waitKey(0) ''' boxes_conf_landms = non_max_suppression(boxes_conf_landms, self.confidence) if len(boxes_conf_landms)<=0: return False, old_image #---------------------------------------------------------# # 如果使用了letterbox_image的话,要把灰条的部分去除掉。 #---------------------------------------------------------# if self.letterbox_image: boxes_conf_landms = retinaface_correct_boxes(boxes_conf_landms, \ np.array([self.input_shape[0], self.input_shape[1]]), np.array([im_height, im_width])) boxes_conf_landms[:,:4] = boxes_conf_landms[:,:4]*scale boxes_conf_landms[:,5:] = boxes_conf_landms[:,5:]*scale_for_landmarks num = 0 for b in boxes_conf_landms: text = "{:.4f}".format(b[4]) b = list(map(int, b)) # b[0]-b[3]为人脸框的坐标,b[4]为得分 cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) cx = b[0] cy = b[1] + 12 cv2.putText(old_image, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) print(b[0], b[1], b[2], b[3], b[4]) c = [-50, -50,50, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] #b += c c = np.array(b) + np.array(c) if (c > 0).all() : b = c else: for i, ind in enumerate(b[:4]): if ind < 0: print(i, b[i]) b[i] = 0 print(b[0], b[1], b[2], b[3], b[4]) #计算人脸框矩形大小 Height = b[3] - b[1] Width = b[2] - b[0] print("窗口大小 / The size of window:" , '\n', "高度 / height:", Height , '\n', "宽度 / width: ", Width) img_blank = old_image[int(b[1]):int(b[3]), int(b[0]):int(b[2])] # height, width savepath = picpath + "/" + str(frameNo) + "_" + str(num)+".jpg" #cv2.namedWindow("img_faces") # , 2) #cv2.imshow("img_faces", img_blank) #显示图片 #img_blank = cv2.cvtColor(img_blank,cv2.COLOR_RGB2BGR) cv2.imwrite(savepath, img_blank) #将图片保存至你指定的文件夹 print("Save into:", savepath) #cv2.waitKey(0) ''' # b[5]-b[14]为人脸关键点的坐标 cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4) cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4) cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4) cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4) cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4) ''' num += 1 return True, old_image