def detect_image(self,image_id,image): self.confidence = 0.05 f = open("./input/detection-results/"+image_id+".txt","w") image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array(letterbox_image(image, (self.model_image_size[0],self.model_image_size[1]))) photo = np.array(crop_img,dtype = np.float32) photo /= 255.0 photo = np.transpose(photo, (2, 0, 1)) photo = photo.astype(np.float32) images = [] images.append(photo) images = np.asarray(images) with torch.no_grad(): images = torch.from_numpy(images) if self.cuda: images = images.cuda() outputs = self.net(images) output_list = [] for i in range(2): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=0.3) try: batch_detections = batch_detections[0].cpu().numpy() except: return image top_index = batch_detections[:,4]*batch_detections[:,5] > self.confidence top_conf = batch_detections[top_index,4]*batch_detections[top_index,5] top_label = np.array(batch_detections[top_index,-1],np.int32) top_bboxes = np.array(batch_detections[top_index,:4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1) # 去掉灰条 boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape) for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = str(top_conf[i]) top, left, bottom, right = boxes[i] f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)),str(int(bottom)))) f.close() return
def predict(self,image): image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array(letterbox_image(image, (self.input_shape[0], self.input_shape[1]))) photo = np.array(crop_img, dtype=np.float32) photo /= 255.0 photo = np.transpose(photo, (2, 0, 1)) photo = photo.astype(np.float32) images = [] images.append(photo) images = np.asarray(images) with torch.no_grad(): images = torch.from_numpy(images) if self.cuda: images = images.cuda() outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=0.3) batch_detections = batch_detections[0].cpu().numpy() top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:, 0], -1), \ np.expand_dims(top_bboxes[:, 1], -1), \ np.expand_dims(top_bboxes[:, 2], -1), \ np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 boxes = yolo_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.input_shape[0], self.input_shape[1]]), image_shape) return boxes,top_label,top_conf
def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array(letterbox_image(image, (self.model_image_size[0],self.model_image_size[1]))) photo = np.array(crop_img,dtype = np.float32) photo /= 255.0 photo = np.transpose(photo, (2, 0, 1)) photo = photo.astype(np.float32) images = [] images.append(photo) images = np.asarray(images) with torch.no_grad(): images = torch.from_numpy(images) if self.cuda: images = images.cuda() outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=0.3) try: batch_detections = batch_detections[0].cpu().numpy() except: return image top_index = batch_detections[:,4]*batch_detections[:,5] > self.confidence top_conf = batch_detections[top_index,4]*batch_detections[top_index,5] top_label = np.array(batch_detections[top_index,-1],np.int32) top_bboxes = np.array(batch_detections[top_index,:4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1) # 去掉灰条 boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf',size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0] for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle( [left + i, top + i, right - i, bottom - i], outline=self.colors[self.class_names.index(predicted_class)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
def detect_image(self, image, aligned_depth_frame=None, color_intrin_part=None, mode=1): image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array(letterbox_image(image, (self.model_image_size[0], self.model_image_size[1]))) photo = np.array(crop_img, dtype=np.float32) photo /= 255.0 photo = np.transpose(photo, (2, 0, 1)) photo = photo.astype(np.float32) images = [] images.append(photo) images = np.asarray(images) with torch.no_grad(): images = torch.from_numpy(images) if self.cuda: images = images.cuda() outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=0.3) try: batch_detections = batch_detections[0].cpu().numpy() except: return image top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 boxes = yolo_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0] for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # print(np.shape(image)[0], np.shape(image)[1]) # print("left:{}, top:{}, right:{}, bottom:{}".format(left, top, right, bottom)) fail = 0 if (aligned_depth_frame and color_intrin_part): ppx = color_intrin_part[0] ppy = color_intrin_part[1] fx = color_intrin_part[2] fy = color_intrin_part[3] width = aligned_depth_frame.width height = aligned_depth_frame.height # ---------------------------------------------------------------------------------------------------------------- # 1、取中心点像素深度 # ---------------------------------------------------------------------------------------------------------------- if mode == 1: center_x = int(round((left + right) / 2)) center_y = int(round((top + bottom) / 2)) # print("center:", center_x, center_y) # print("depth size:", width, height) center_x = min(max(1, center_x), width - 1) center_y = min(max(1, center_y), height - 1) # print("center_after:", center_x, center_y) # center_x = min(max(0,center_x),width) # center_y = min(max(0,center_y),height) target_xy_pixel = [center_x, center_y] target_depth = aligned_depth_frame.get_distance(target_xy_pixel[0], target_xy_pixel[1]) strDistance = "\n%.2f m" % target_depth target_xy_true = [(target_xy_pixel[0] - ppx) * target_depth / fx, (target_xy_pixel[1] - ppy) * target_depth / fy] # # ---------------------------------------------------------------------------------------------------------------- # # 2、取box里面所有像素深度值后平均 # # ---------------------------------------------------------------------------------------------------------------- # elif mode == 2: # depth = 0 # cnt = 0 # depth_matrix = np.zeros((width, height)) # for x in range(left, right): # for y in range(top, bottom): # depth_matrix[x][y] = aligned_depth_frame.get_distance(x, y) # # print("x:{}, y:{}".format(x,y),depth_matrix[x][y]) # depth += depth_matrix[x][y] # cnt += 1 # target_depth = depth / cnt # minn = 1000000 # pseudo_x = 0 # pseudo_y = 0 # for x in range(left, right): # for y in range(top, bottom): # if minn > abs(depth_matrix[x][y] - target_depth): # minn = abs(depth_matrix[x][y] - target_depth) # pseudo_x = x # pseudo_y = y # target_xy_pixel = [pseudo_x, pseudo_y] # strDistance = " depth: %.2f m" % target_depth # target_xy_true = [(pseudo_x - ppx) * target_depth / fx, # (pseudo_y - ppy) * target_depth / fy] # # # ---------------------------------------------------------------------------------------------------------------- # # 3、去前后百分之十的极值后再平均 # # ---------------------------------------------------------------------------------------------------------------- # elif mode == 3: # depth = 0 # cnt = 0 # depth_matrix = np.zeros((width, height)) # for x in range(left, right): # for y in range(top, bottom): # depth_matrix[x][y] = aligned_depth_frame.get_distance(x, y) # # depth_matrix_flat = depth_matrix[left:right, top:bottom].reshape((right - left) * (bottom - top), ) # matrix_flat_len = depth_matrix_flat.shape[0] # drop_len = int(matrix_flat_len * 0.1) # depth_matrix_flat.sort() # depth_matrix_flat = depth_matrix_flat[drop_len:-drop_len] # depth = depth_matrix_flat.sum() # # target_depth = depth / (matrix_flat_len - 2 * drop_len) # minn = 1000000 # pseudo_x = 0 # pseudo_y = 0 # for x in range(left, right): # for y in range(top, bottom): # if minn > abs(depth_matrix[x][y] - target_depth): # minn = abs(depth_matrix[x][y] - target_depth) # pseudo_x = x # pseudo_y = y # target_xy_pixel = [pseudo_x, pseudo_y] # strDistance = " depth: %.2f m" % target_depth # target_xy_true = [(pseudo_x - ppx) * target_depth / fx, # (pseudo_y - ppy) * target_depth / fy] # # # ---------------------------------------------------------------------------------------------------------------- # # 4、去掉深度缺失的像素(深度为0)后再平均 # # ---------------------------------------------------------------------------------------------------------------- # elif mode == 4: # depth = 0 # cnt = 0 # depth_matrix = np.zeros((width, height)) # for x in range(left, right): # for y in range(top, bottom): # depth_matrix[x][y] = aligned_depth_frame.get_distance(x, y) # if depth_matrix[x][y] > 0: # depth += depth_matrix[x][y] # cnt += 1 # if cnt == 0: # print("该目标框内所有像素均检测缺失,无法计算深度") # fail = 1 # else: # target_depth = depth / cnt # minn = 1000000 # pseudo_x = 0 # pseudo_y = 0 # for x in range(left, right): # for y in range(top, bottom): # if minn > abs(depth_matrix[x][y] - target_depth): # minn = abs(depth_matrix[x][y] - target_depth) # pseudo_x = x # pseudo_y = y # target_xy_pixel = [pseudo_x, pseudo_y] # strDistance = " depth: %.2f m" % target_depth # target_xy_true = [(pseudo_x - ppx) * target_depth / fx, # (pseudo_y - ppy) * target_depth / fy] else: strDistance = "\n 0 m" # 画框框---------------------------------------------------------------------------------------------------- if fail == 0: label = '{} {:.2f}'.format(predicted_class, score) label = label + strDistance draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') # print(label) print('检测出目标:{} ;实际坐标为(m):({:.3f}, {:.3f}, {:.3f}) \n中心点像素坐标(pixel):({}, {}) ;中心点相机坐标(m):({},{});深度: {} m\n'.format(predicted_class, target_xy_true[0],target_xy_true[1],target_depth, target_xy_pixel[0], target_xy_pixel[1], target_xy_true[0], target_xy_true[1], target_depth)) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle( [left + i, top + i, right - i, bottom - i], outline=self.colors[self.class_names.index(predicted_class)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
def get_FPS(self, image, test_interval): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize #---------------------------------------------------------# crop_img = np.array( letterbox_image( image, (self.model_image_size[1], self.model_image_size[0]))) photo = np.array(crop_img, dtype=np.float32) / 255.0 photo = np.transpose(photo, (2, 0, 1)) #---------------------------------------------------------# # 添加上batch_size维度 #---------------------------------------------------------# images = [photo] with torch.no_grad(): images = torch.from_numpy(np.asarray(images)) if self.cuda: images = images.cuda() outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=self.iou) try: batch_detections = batch_detections[0].cpu().numpy() top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array( [self.model_image_size[0], self.model_image_size[1]]), image_shape) except: pass t1 = time.time() for _ in range(test_interval): with torch.no_grad(): outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression( output, len(self.class_names), conf_thres=self.confidence, nms_thres=self.iou) try: batch_detections = batch_detections[0].cpu().numpy() top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[ top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims( top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([ self.model_image_size[0], self.model_image_size[1] ]), image_shape) except: pass t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def detect_image(self, image_id, image): self.confidence = 0.01 self.iou = 0.5 f = open("./input/detection-results/" + image_id + ".txt", "w") image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# if self.letterbox_image: crop_img = np.array( letterbox_image( image, (self.model_image_size[1], self.model_image_size[0]))) else: crop_img = image.convert('RGB') crop_img = crop_img.resize( (self.model_image_size[1], self.model_image_size[0]), Image.BICUBIC) photo = np.array(crop_img, dtype=np.float32) / 255.0 photo = np.transpose(photo, (2, 0, 1)) #---------------------------------------------------------# # 添加上batch_size维度 #---------------------------------------------------------# images = [photo] with torch.no_grad(): images = torch.from_numpy(np.asarray(images)) if self.cuda: images = images.cuda() #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) #---------------------------------------------------------# # 将预测框进行堆叠,然后进行非极大抑制 #---------------------------------------------------------# output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, self.num_classes, conf_thres=self.confidence, nms_thres=self.iou) #---------------------------------------------------------# # 如果没有检测出物体,返回 #---------------------------------------------------------# try: batch_detections = batch_detections[0].cpu().numpy() except: return #---------------------------------------------------------# # 对预测框进行得分筛选 #---------------------------------------------------------# top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) #-----------------------------------------------------------------# # 在图像传入网络预测前会进行letterbox_image给图像周围添加灰条 # 因此生成的top_bboxes是相对于有灰条的图像的 # 我们需要对其进行修改,去除灰条的部分。 #-----------------------------------------------------------------# if self.letterbox_image: boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array( [self.model_image_size[0], self.model_image_size[1]]), image_shape) else: top_xmin = top_xmin / self.model_image_size[1] * image_shape[1] top_ymin = top_ymin / self.model_image_size[0] * image_shape[0] top_xmax = top_xmax / self.model_image_size[1] * image_shape[1] top_ymax = top_ymax / self.model_image_size[0] * image_shape[0] boxes = np.concatenate( [top_ymin, top_xmin, top_ymax, top_xmax], axis=-1) for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = str(top_conf[i]) top, left, bottom, right = boxes[i] f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str( int(top)), str(int(right)), str(int(bottom)))) f.close() return
def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize #---------------------------------------------------------# crop_img = np.array( letterbox_image( image, (self.model_image_size[1], self.model_image_size[0]))) photo = np.array(crop_img, dtype=np.float32) / 255.0 photo = np.transpose(photo, (2, 0, 1)) #---------------------------------------------------------# # 添加上batch_size维度 #---------------------------------------------------------# images = [photo] with torch.no_grad(): images = torch.from_numpy(np.asarray(images)) if self.cuda: images = images.cuda() #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) #---------------------------------------------------------# # 将预测框进行堆叠,然后进行非极大抑制 #---------------------------------------------------------# output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=self.iou) #---------------------------------------------------------# # 如果没有检测出物体,返回原图 #---------------------------------------------------------# try: batch_detections = batch_detections[0].cpu().numpy() except: return image #---------------------------------------------------------# # 对预测框进行得分筛选 #---------------------------------------------------------# top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) #-----------------------------------------------------------------# # 在图像传入网络预测前会进行letterbox_image给图像周围添加灰条 # 因此生成的top_bboxes是相对于有灰条的图像的 # 我们需要对其进行修改,去除灰条的部分。 #-----------------------------------------------------------------# boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = max((np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0], 1) for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[self.class_names.index( predicted_class)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array( letterbox_image( image, (self.model_image_size[0], self.model_image_size[1]))) photo = np.array(crop_img, dtype=np.float32) photo /= 255.0 photo = np.transpose(photo, (2, 0, 1)) photo = photo.astype(np.float32) images = [] images.append(photo) images = np.asarray(images) with torch.no_grad(): images = torch.from_numpy(images) if self.cuda: images = images.cuda() outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=0.3) try: batch_detections = batch_detections[0].cpu().numpy() except: return image top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0] for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = top_conf[i] top, left, bottom, right = boxes[i] # top = top - 250 # left = left - 250 # bottom = bottom + 250 # right = right + 250 top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 # 从左上角开始 剪切 200*200的图片 img2 = image.crop((left, top, right, bottom)) img2.save("lena2.jpg") top = max(0, np.floor(top).astype('int32')) left = max(0, np.floor(left).astype('int32')) bottom = min(np.shape(image)[0], np.floor(bottom).astype('int32')) right = min(np.shape(image)[1], np.floor(right).astype('int32')) # 画框框 # 画框框 if predicted_class == 'person': predicted_class_ch = "Ren" elif predicted_class == 'chair': predicted_class_ch = "椅子" elif predicted_class == 'clock': predicted_class_ch = "钟" elif predicted_class == 'tie': predicted_class_ch = "厂牌吗??" elif predicted_class == 'cell phone': predicted_class_ch = "手机" elif predicted_class == 'laptop': predicted_class_ch = "笔记本电脑" elif predicted_class == 'QR': predicted_class_ch = "2维码" else: predicted_class_ch = "单号" label = '{} {} {:.2f} {}'.format(predicted_class_ch, '置信度', score, '%') draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 3]) for i in range(thickness): draw.rectangle( #边框 [left + i, top + i, right - i, bottom - i], outline=self.colors[c]) #draw.rectangle( # [tuple(text_origin), tuple(text_origin)+ label_size], # fill=self.colors[c])Y1909170500-F1-1568720302878.jpg 不行 # 绘制文本E:\发货单\截图20200727212747.png # font = ImageFont.truetype("consola.ttf", 40, encoding="unic") # 设置字体 # draw.text((100, 50), u'Hello World', 'fuchsia', font) #draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) #del draw draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
def generate_box(self, image): image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array(letterbox_image(image, (self.model_image_size[0], self.model_image_size[1]))) photo = np.array(crop_img, dtype=np.float32) photo /= 255.0 photo = np.transpose(photo, (2, 0, 1)) photo = photo.astype(np.float32) images = [] images.append(photo) images = np.asarray(images) images = torch.from_numpy(images) if self.cuda: images = images.cuda() with torch.no_grad(): outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, self.config["yolo"]["classes"], conf_thres=self.confidence, nms_thres=0.3) try: batch_detections = batch_detections[0].cpu().numpy() except: boxlist = [] return boxlist top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 boxes = yolo_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0] boxlist = [] for i, c in enumerate(top_label): top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32')) box_str = str(left) + ',' + str(top) + ',' + str(right) + ',' + str(bottom) boxlist.append(box_str) return boxlist
def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array( letterbox_image( image, (self.model_image_size[0], self.model_image_size[1]))) photo = np.array(crop_img, dtype=np.float32) photo /= 255.0 # 归一化 photo = np.transpose(photo, (2, 0, 1)) photo = photo.astype(np.float32) images = [] images.append(photo) images = np.asarray(images) with torch.no_grad(): images = torch.from_numpy(images) if self.cuda: images = images.cuda() outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=0.3) try: batch_detections = batch_detections[0].cpu().numpy() except: return [] top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) l = [] for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) l.append([left, top, right, bottom, score, predicted_class]) return l
def detect_image(self, image): # embed() image_shape = np.array(np.shape(image)[0:2]) num_class = len(self.class_names) # 有80类 # embed() #---------------------------------------------------------# # 给图像增加灰条(什么是灰条),实现不失真的resize #---------------------------------------------------------# # 复制image return new_image crop_img = np.array( letterbox_image( image, (self.model_image_size[1], self.model_image_size[0]))) photo = np.array(crop_img, dtype=np.float32) / 255.0 # 归一化? photo = np.transpose( photo, (2, 0, 1) ) # 转置:将Image.open(img)得到的[H,W,C]格式转换permute为pytorch可以处理的[C,H,W]格式 #---------------------------------------------------------# # 添加上batch_size维度 #---------------------------------------------------------# images = [photo] # 将photo变为list类型 with torch.no_grad( ): # disabled gradient calculation,reduce memory consumption for computations images = torch.from_numpy( np.asarray(images) ) # Creates a Tensor from a numpy.ndarray,此时images的shape为[1, 3, 416, 416] if self.cuda: images = images.cuda() #---------------------------------------------------------# # 将图像输入网络当中进行预测! #---------------------------------------------------------# # embed() # 从这里开始处理 # 特征提取 # 输出outputs为tuple,len=3,每个tensor的shape分别为 第一个特征层[1, 255, 13, 13],第二个特征层[1, 255, 26, 26],第三个特征层[1, 255, 52, 52] outputs = self.net(images) # embed() output_list = [] for i in range(3): # 为什么是3 # 有三个特征层,每个特征层对应自己的decode解码器 output_list.append(self.yolo_decodes[i]( outputs[i])) # 在这里打几个断点看看 #---------------------------------------------------------# # 将预测框进行堆叠,然后进行非极大抑制 #---------------------------------------------------------# # torch.cat()对矩阵按行进行拼接得到向量 output = torch.cat(output_list, 1) # 这里也打几个断点 # output就是predictions,格式为[batch_size, num_anchors, 85] batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=self.iou) # embed() #---------------------------------------------------------# # 如果没有检测出物体,返回原图 #---------------------------------------------------------# try: batch_detections = batch_detections[0].cpu().numpy() except: return image #---------------------------------------------------------# # 对预测框进行得分筛选 #---------------------------------------------------------# # coordinates = []# bboxes的坐标 top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) # 得到坐标点 top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # coordinates.append((top_xmin,top_xmax,top_ymin,top_ymax))# 把四个坐标点看做一个整体 #-----------------------------------------------------------------# # 在图像传入网络预测前会进行letterbox_image给图像周围添加灰条 # 因此生成的top_bboxes是相对于有灰条的图像的 # 我们需要对其进行修改,去除灰条的部分。 #-----------------------------------------------------------------# # boxes存放各目标的坐标 boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = max((np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0], 1) for i, c in enumerate(top_label): # embed() predicted_class = self.class_names[c] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 # 左上角点的坐标 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) # 右下角点的坐标 bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[self.class_names.index( predicted_class)]) draw.rectangle( # 画框框 [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image, boxes # 将boxes返回
def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array( letterbox_image(image, (self.model_image_size[1], self.model_image_size[0]))) # letterbox_image??? photo = np.array(crop_img, dtype=np.float32) photo /= 255.0 # 归一化操作 photo = np.transpose(photo, (2, 0, 1)) # 通道维度调整(pytorch),有利于GPU处理 photo = photo.astype(np.float32) images = [] # 扩充一个维度 images.append(photo) # 扩充一个维度 images = np.asarray(images) images = torch.from_numpy(images) # numpy转化为tensor if self.cuda: images = images.cuda() with torch.no_grad(): outputs = self.net(images) # 图片传入网络,得到网络的预测结果 output_list = [] # 三个size的预测结果 for i in range(3): # 经过三次循环对特征层解码(先验框) output_list.append(self.yolo_decodes[i]( outputs[i])) # yolo_decodes 先验框调整的过程 output = torch.cat(output_list, 1) batch_detections = non_max_suppression( output, self.config["yolo"]["classes"], # 非极大值抑制 conf_thres=self.confidence, nms_thres=self.iou) try: batch_detections = batch_detections[0].cpu().numpy() # 判断图片是否还有框 except: return image top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条(基于原图的坐标绘制框) boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) # 定义字体 thickness = (np.shape(image)[0] + np.shape(image)[1] ) // self.model_image_size[0] # 框的宽度怎么样子的 # 画图的代码 for i, c in enumerate(top_label): predicted_class = self.class_names[c] # 取出类的名称 score = top_conf[i] # 取出类的得分 top, left, bottom, right = boxes[i] # # 取出类的位置 top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle( # 绘画矩形 [left + i, top + i, right - i, bottom - i], outline=self.colors[self.class_names.index( predicted_class)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) # 写字 del draw return image
def detect_image(self, image): predict = np.zeros(12) image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array( letterbox_image( image, (self.model_image_size[0], self.model_image_size[1]))) photo = np.array(crop_img, dtype=np.float32) photo /= 255.0 photo = np.transpose(photo, (2, 0, 1)) photo = photo.astype(np.float32) images = [] images.append(photo) images = np.asarray(images) with torch.no_grad(): images = torch.from_numpy(images) if self.cuda: images = images.cuda() outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=0.3) try: batch_detections = batch_detections[0].cpu().numpy() except: return image, predict top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0] predict = self.calsquare(image, boxes, top_label, predict) return image, predict
def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array( letterbox_image( image, (self.model_image_size[0], self.model_image_size[1]))) # 更改格式为float32 photo = np.array(crop_img, dtype=np.float32) # 像素值压缩到0-1之间 photo /= 255.0 # 读进来得图片是H*W*C,输入网络时要求C*H*W,因此在这里变换通道 photo = np.transpose(photo, (2, 0, 1)) # photo = photo.astype(np.float32) images = [] images.append(photo) images = np.asarray(images) with torch.no_grad(): # 把numpy格式的像素数组转为tensor images = torch.from_numpy(images) if self.cuda: # 把数据放到GPU上 images = images.cuda() # YoloBody得到预测结果 # self.net == self.net.forword(images) outputs = self.net(images) output_list = [] for i in range(3): # 用第i个DecodeBox来处理第i个output output_list.append(self.yolo_decodes[i](outputs[i])) # 将13、26、52的output拼接到一起 bs * 10647 * [4+1+num_classes] output = torch.cat(output_list, 1) # 使用非极大似然抑制剔除一定区域内的重复框 # bs * n * [(x1,y1,x2,y2)+obj_conf+class_conf+class_pred] batch_detections = non_max_suppression(output, len(self.class_names), conf_thres=self.confidence, nms_thres=self.iou) # 整理检测结果 try: batch_detections = batch_detections[0].cpu().numpy() except: return image # 根据score再筛选一遍,但是在non_max_suppression已经使用score筛选过了为什么还要筛选呢? top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence # 根据筛选结果得到符合要求的score、label、bboxes top_score = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) # 将(x1,y1,x2,y2)分别扩展至n*1维,n为box总数 top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条,得到原图上预测框(y1,x1,y2,x2)坐标(top,left,bottom,right) boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) # 绘制检测结果 font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) # 矩形框四边线条厚度 # thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0] thickness = int( max(np.ceil(np.shape(image)[0] / self.model_image_size[0]), np.ceil(np.shape(image)[1] / self.model_image_size[0]))) + 1 for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = top_score[i] top, left, bottom, right = boxes[i] # top = top - 5 # left = left - 5 # bottom = bottom + 5 # right = right + 5 # top = max(0, np.floor(top + 0.5).astype('int32')) # left = max(0, np.floor(left + 0.5).astype('int32')) # bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) # right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32')) top = max(0, np.ceil(top).astype('int32')) left = max(0, np.ceil(left).astype('int32')) bottom = min(np.shape(image)[0], np.ceil(bottom).astype('int32')) right = min(np.shape(image)[1], np.ceil(right).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) # 返回使用指定字体对象显示给定字符串所需要的图像尺寸 label_size = draw.textsize(label, font) # label = label.encode('utf-8') # print(label) # 如果顶部有文本框的空间,文本框放置在预测框左上方的外部 if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) # 顶部没有文本框的空间,文本框放置在预测框左上方的内部 else: # text_origin = np.array([left, top + 1]) text_origin = np.array([left + 1, top + 1]) # 绘制预测框的空心矩形 draw.rectangle( [left, top, right, bottom], outline=self.colors[self.class_names.index(predicted_class)], width=thickness) # 绘制文本框的实心矩形 draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) # 绘制文本框内的文字 # draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font = font) # fill = (0, 0, 0) 文字颜色纯黑 draw.text(text_origin, label, fill=(0, 0, 0), font=font) del draw return image
def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array( letterbox_image( image, (self.model_image_size[0], self.model_image_size[1]))) photo = np.array(crop_img, dtype=np.float32) photo /= 255.0 photo = np.transpose(photo, (2, 0, 1)) photo = photo.astype(np.float32) images = [] images.append(photo) images = np.asarray(images) images = torch.from_numpy(images) if self.cuda: images = images.cuda() with torch.no_grad(): outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression( output, self.config["yolo"]["classes"], conf_thres=self.confidence, nms_thres=self.iou) print(f'[INFO] batch_detections: {batch_detections[0].shape}') try: batch_detections = batch_detections[0].cpu().numpy() except: return image top_index = batch_detections[:, 4] > self.confidence top_conf = batch_detections[top_index, 4] top_angle = batch_detections[top_index, 5:8] top_bboxes = np.array(batch_detections[top_index, :4]) # print(f'[INFO] top_index: {top_index.shape}') # print(f'[INFO] top_conf: {top_conf.shape}') # print(f'[INFO] top_angle: {top_angle.shape}') # print(f'[INFO] top_bboxes: {top_bboxes.shape}') top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=15) thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0] predictions = [] for i, score in enumerate(top_conf): infor = {} top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 yaw, pitch, roll = top_angle[i] * 90 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) infor["box"] = [left, top, right, bottom] infor["angle"][yaw, pitch, roll] predictions.append(infor) # draw box and angle in image # draw = ImageDraw.Draw(image) # for i in range(thickness): # draw.rectangle( # [left + i, top + i, right - i, bottom - i], # outline="red") # draw.text([left + i*10, top + i*10], str(score), # fill=(255, 0, 0), font=font) # del draw # image_numpy = np.array(image) # print(f'[PREDICT] box: {[top, left, bottom, right]}') # print(f'[PREDICT] yaw = {yaw}, pitch = {pitch}, roll = {roll}') # img = draw_axis(image_numpy, yaw, pitch, roll, (left+right)//2, (top + bottom)//2) # image = Image.fromarray(img) # image.save('test.jpg') # print('save successfully !!!') return predictions
def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) # 图片处理 crop_img = np.array( letterbox_image(image, (self.model_image_size[0], self.model_image_size[1]))) # 对图片RESIZE并加灰条 photo = np.array(crop_img, dtype=np.float32) photo /= 255.0 # 归一化 photo = np.transpose(photo, (2, 0, 1)) # 在pytorch中通道数在第一个,所以在这调整顺序 photo = photo.astype(np.float32) # 转换数据类型 images = [] images.append(photo) images = np.asarray(images) images = torch.from_numpy(images) # 将numpy转换成tenor类型 if self.cuda: images = images.cuda() # 放入网络中进行预测并画框 with torch.no_grad(): outputs = self.net(images) # 图片放入网络中 output_list = [] for i in range(3): # 特征层解码,因为特征金字塔有三个尺度的输出,所以要循环三次,将三个特征层全部解码。 output_list.append(self.yolo_decodes[i]( outputs[i])) # 解码:调整先验框 output = torch.cat(output_list, 1) # 将预测结果堆叠起来 batch_detections = non_max_suppression( output, self.config["yolo"]["classes"], conf_thres=self.confidence, nms_thres=0.3) # non_max_suppression()是进行非极大抑制 try: batch_detections = batch_detections[0].cpu().numpy() except: return image top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence # 将框框的置信度和类的置信度相乘进行判断 top_conf = batch_detections[top_index, 4] * batch_detections[ top_index, 5] # 下面这三行是将置信度较高的筛选出来 top_label = np.array(batch_detections[top_index, -1], np.int32) top_bboxes = np.array(batch_detections[top_index, :4]) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) # 去掉灰条 ''' 目前框框的位置是相对于有灰条图片左上角的位置。去掉灰条要转换为原图的左上角的位置。 yolo_correct_boxes函数就是完成这样的坐标变换 ''' boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) # 定义字体 thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0] # 定义框框的宽度 # 下面的代码就是用来画图的 for i, c in enumerate(top_label): predicted_class = self.class_names[c] # 获得类的名称 score = top_conf[i] # 获得得分 # 获得位置信息 top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[self.class_names.index( predicted_class)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) # 在框上写字 del draw return image