def image_test(self, path, inputs=None, oimg=None): bbox_util = BBoxUtility(2) if path != None: img = cv2.imread(path) images = img.copy() img = cv2.resize(img, (self.input_shape[0], self.input_shape[1])) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) inputs = image.img_to_array(img) inputs = preprocess_input(np.array([inputs])) else: images = oimg.copy() preds = self.model.predict(inputs, batch_size=1, verbose=1) results = bbox_util.detection_out(preds) print(results) if len(results) > 0: final = [] for each in results[0]: if each[1] < 0.4: continue xmin = int(each[2] * np.shape(images)[1]) ymin = int(each[3] * np.shape(images)[0]) xmax = int(each[4] * np.shape(images)[1]) ymax = int(each[5] * np.shape(images)[0]) final.append([xmin, ymin, xmax, ymax, each[1]]) return final return None
def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.sess = K.get_session() self.config = Config() self.generate() self.bbox_util = BBoxUtility()
def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.sess = K.get_session() self.generate() self.prior = self._get_prior() self.bbox_util = BBoxUtility(self.num_classes)
def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.sess = K.get_session() self.config = Config() self.generate() self.bbox_util = BBoxUtility(classifier_nms=self.iou, top_k=self.config.num_RPN_predict_pre)
def __init__(self, **kwargs): self.__dict__.update(self._defaults) if self.backbone == "mobilenet": self.cfg = cfg_mnet else: self.cfg = cfg_re50 self.bbox_util = BBoxUtility() self.generate()
def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.model_image_size = [ image_sizes[self.phi], image_sizes[self.phi], 3 ] self.generate() self.bbox_util = BBoxUtility(self.num_classes, nms_thresh=self.iou) self.prior = self._get_prior()
def __init__(self, **kwargs): self.__dict__.update(self._defaults) if self.backbone == "mobilenet": self.cfg = cfg_mnet else: self.cfg = cfg_re50 self.bbox_util = BBoxUtility(nms_thresh=self.nms_iou) self.generate() self.anchors = Anchors(self.cfg, image_size=(self.input_shape[0], self.input_shape[1])).get_anchors()
def __init__(self, **kwargs): self.__dict__.update(self._defaults) for name, value in kwargs.items(): setattr(self, name, value) #---------------------------------------------------# # 不同主干网络的config信息 #---------------------------------------------------# if self.backbone == "mobilenet": self.cfg = cfg_mnet else: self.cfg = cfg_re50 #---------------------------------------------------# # 工具箱和先验框的生成 #---------------------------------------------------# self.bbox_util = BBoxUtility(nms_thresh=self.nms_iou) self.anchors = Anchors(self.cfg, image_size=(self.input_shape[0], self.input_shape[1])).get_anchors() self.generate()
callback.writer.add_summary(summary, batch_no) callback.writer.flush() #----------------------------------------------------# # 检测精度mAP和pr曲线计算参考视频 # https://www.bilibili.com/video/BV1zE411u7Vw #----------------------------------------------------# if __name__ == "__main__": config = Config() NUM_CLASSES = 21 # 训练100世代 EPOCH = 100 # 开始使用1e-4训练,每过10个世代降低为原来的1/2 Learning_rate = 1e-4 bbox_util = BBoxUtility(overlap_threshold=config.rpn_max_overlap, ignore_threshold=config.rpn_min_overlap) annotation_path = '2007_train.txt' #------------------------------------------------------# # 权值文件请看README,百度网盘下载 # 训练自己的数据集时提示维度不匹配正常 # 预测的东西都不一样了自然维度不匹配 #------------------------------------------------------# model_rpn, model_classifier, model_all = get_model(config, NUM_CLASSES) base_net_weights = "model_data/voc_weights.h5" model_all.summary() model_rpn.load_weights(base_net_weights, by_name=True) model_classifier.load_weights(base_net_weights, by_name=True) with open(annotation_path) as f:
class SSD(object): _defaults = { "model_path": 'model_data/ssd_weights.h5', "classes_path": 'model_data/voc_classes.txt', "model_image_size": (300, 300, 3), "confidence": 0.5, } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" #---------------------------------------------------# # 初始化ssd #---------------------------------------------------# def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.sess = K.get_session() self.generate() self.bbox_util = BBoxUtility(self.num_classes) #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def _get_class(self): classes_path = os.path.expanduser(self.classes_path) with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def generate(self): model_path = os.path.expanduser(self.model_path) assert model_path.endswith( '.h5'), 'Keras model or weights must be a .h5 file.' # 计算总的种类 self.num_classes = len(self.class_names) + 1 # 载入模型 self.ssd_model = ssd.SSD300(self.model_image_size, self.num_classes) self.ssd_model.load_weights(self.model_path, by_name=True) self.ssd_model.summary() print('{} model, anchors, and classes loaded.'.format(model_path)) # 画框设置不同的颜色 hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) #---------------------------------------------------# # 检测图片 #---------------------------------------------------# def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) crop_img, x_offset, y_offset = letterbox_image( image, (self.model_image_size[0], self.model_image_size[1])) photo = np.array(crop_img, dtype=np.float64) # 图片预处理,归一化 photo = preprocess_input( np.reshape( photo, [1, self.model_image_size[0], self.model_image_size[1], 3])) preds = self.ssd_model.predict(photo) # 将预测结果进行解码 results = self.bbox_util.detection_out( preds, confidence_threshold=self.confidence) if len(results[0]) <= 0: return image # 筛选出其中得分高于confidence的框 det_label = results[0][:, 0] det_conf = results[0][:, 1] det_xmin, det_ymin, det_xmax, det_ymax = results[0][:, 2], results[ 0][:, 3], results[0][:, 4], results[0][:, 5] top_indices = [ i for i, conf in enumerate(det_conf) if conf >= self.confidence ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( det_xmin[top_indices], -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims( det_xmax[top_indices], -1), np.expand_dims(det_ymax[top_indices], -1) # 去掉灰条 boxes = ssd_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0] for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c) - 1] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[int(c) - 1]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[int(c) - 1]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image def close_session(self): self.sess.close()
class FRCNN(object): _defaults = { "model_path": '/home/gxt/study/faster-rcnn-keras-master/logs/Epoch100-Total_Loss0.8886-Val_Loss1.0822.h5', "classes_path": 'model_data/voc_classes.txt', "confidence": 0.5, "iou": 0.3 } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" #---------------------------------------------------# # Initialize fast RCNN #---------------------------------------------------# def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.sess = K.get_session() self.config = Config() self.generate() self.bbox_util = BBoxUtility(classifier_nms=self.iou, top_k=self.config.num_RPN_predict_pre) #---------------------------------------------------# # Get all categories #---------------------------------------------------# def _get_class(self): classes_path = os.path.expanduser(self.classes_path) with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names #---------------------------------------------------# # Load model #---------------------------------------------------# def generate(self): model_path = os.path.expanduser(self.model_path) assert model_path.endswith( '.h5'), 'Keras model or weights must be a .h5 file.' #-------------------------------# # Calculate the total number of classes #-------------------------------# self.num_classes = len(self.class_names) + 1 #-------------------------------# # Loading model and weight #-------------------------------# self.model_rpn, self.model_classifier = frcnn.get_predict_model( self.config, self.num_classes) self.model_rpn.load_weights(self.model_path, by_name=True) self.model_classifier.load_weights(self.model_path, by_name=True) print('{} model, anchors, and classes loaded.'.format(model_path)) # Set different colors for the frame hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) #---------------------------------------------------# # Used to calculate the size of shared feature layers #---------------------------------------------------# def get_img_output_length(self, width, height): def get_output_length(input_length): filter_sizes = [7, 3, 1, 1] padding = [3, 1, 0, 0] stride = 2 for i in range(4): # input_length = (input_length - filter_size + stride) // stride input_length = (input_length + 2 * padding[i] - filter_sizes[i]) // stride + 1 return input_length return get_output_length(width), get_output_length(height) #---------------------------------------------------# # Detect images #---------------------------------------------------# def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) old_width, old_height = image_shape[1], image_shape[0] old_image = copy.deepcopy(image) #---------------------------------------------------------# # Reset the original image to the size of 600 short edges #---------------------------------------------------------# width, height = get_new_img_size(old_width, old_height) image = image.resize([width, height], Image.BICUBIC) photo = np.array(image, dtype=np.float64) #-----------------------------------------------------------# # Image preprocessing and normalization. #-----------------------------------------------------------# photo = preprocess_input(np.expand_dims(photo, 0)) rpn_pred = self.model_rpn.predict(photo) #-----------------------------------------------------------# # The prediction result of the suggestion box network is decoded #-----------------------------------------------------------# base_feature_width, base_feature_height = self.get_img_output_length( width, height) anchors = get_anchors([base_feature_width, base_feature_height], width, height) rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors) #-------------------------------------------------------------# # After obtaining the suggestion box and the shared feature layer, they are passed into the classifier for prediction #-------------------------------------------------------------# base_layer = rpn_pred[2] proposal_box = np.array(rpn_results)[:, :, 1:] temp_ROIs = np.zeros_like(proposal_box) temp_ROIs[:, :, [0, 1, 2, 3]] = proposal_box[:, :, [1, 0, 3, 2]] classifier_pred = self.model_classifier.predict( [base_layer, temp_ROIs]) #-------------------------------------------------------------# # The prediction frame is obtained by decoding the suggestion box by using the prediction results of classifier #-------------------------------------------------------------# results = self.bbox_util.detection_out_classifier( classifier_pred, proposal_box, self.config, self.confidence) if len(results[0]) == 0: return old_image results = np.array(results[0]) boxes = results[:, :4] top_conf = results[:, 4] top_label_indices = results[:, 5] boxes[:, [0, 2]] = boxes[:, [0, 2]] * old_width boxes[:, [1, 3]] = boxes[:, [1, 3]] * old_height font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = max( (np.shape(old_image)[0] + np.shape(old_image)[1]) // old_width * 2, 1) image = old_image for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c)] score = top_conf[i] left, top, right, bottom = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[int(c)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[int(c)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image def close_session(self): self.sess.close()
class Retinaface(object): #-------------------------------# # 请注意主干网络 # 与预训练权重的对应 # 即注意修改model_path # 和backbone #-------------------------------# _defaults = { "model_path": 'model_data/retinaface_mobilenet025.h5', "backbone": "mobilenet", "confidence": 0.5, } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" #---------------------------------------------------# # 初始化Retinaface #---------------------------------------------------# def __init__(self, **kwargs): self.__dict__.update(self._defaults) if self.backbone == "mobilenet": self.cfg = cfg_mnet else: self.cfg = cfg_re50 self.bbox_util = BBoxUtility() self.generate() #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def generate(self): model_path = os.path.expanduser(self.model_path) assert model_path.endswith( '.h5'), 'Keras model or weights must be a .h5 file.' # 载入模型 self.retinaface = RetinaFace(self.cfg, self.backbone) self.retinaface.load_weights(self.model_path, by_name=True) #---------------------------------------------------# # 检测图片 #---------------------------------------------------# def detect_image(self, image): old_image = image.copy() image = np.array(image, np.float32) im_height, im_width, _ = np.shape(image) scale = [ np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0] ] scale_for_landmarks = [ np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0] ] # 图片预处理,归一化 photo = np.expand_dims(preprocess_input(image), 0) anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors() preds = self.retinaface.predict(photo) # 将预测结果进行解码和非极大抑制 results = self.bbox_util.detection_out( preds, anchors, confidence_threshold=self.confidence) if len(results) <= 0: return old_image results = np.array(results) results[:, :4] = results[:, :4] * scale results[:, 5:] = results[:, 5:] * scale_for_landmarks for b in results: text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) cx = b[0] cy = b[1] + 12 cv2.putText(old_image, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) # landms cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4) cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4) cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4) cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4) cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4) return old_image
def __init__(self, weight_path=None): self.classes = config.CLASSES self.input_shape = config.IMAGE_SIZE self._load_weigth(weight_path=weight_path) self.bbox_util = BBoxUtility(len(self.classes))
class SSD(object): _defaults = { "model_path": 'logs/ep106-loss0.207-val_loss0.855.h5', # 此路径可以修改为自己的模型目录 "classes_path": 'model_data/voc_classes.txt', "model_image_size": (300, 300, 3), "confidence": 0.4, } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" #---------------------------------------------------# # 初始化yolo #---------------------------------------------------# def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.sess = get_session() self.generate() self.bbox_util = BBoxUtility(self.num_classes) #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def _get_class(self): classes_path = os.path.expanduser(self.classes_path) with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def generate(self): model_path = os.path.expanduser(self.model_path) assert model_path.endswith( '.h5'), 'Keras model or weights must be a .h5 file.' # 计算总的种类 self.num_classes = len(self.class_names) + 1 # 载入模型,如果原来的模型里已经包括了模型结构则直接载入。 # 否则先构建模型再载入 try: self.ssd_model = tf.keras.models.load_model(model_path, compile=False) except: self.ssd_model = ssd.SSD300(self.model_image_size, self.num_classes) self.ssd_model.load_weights(self.model_path, by_name=True) else: num_anchors = 8753 assert self.ssd_model.layers[-1].output_shape[-1] == \ num_anchors/len(self.ssd_model.output) * (self.num_classes + 5), \ 'Mismatch between model and given anchor and class sizes' #self.ssd_model.summary() print('{} model, anchors, and classes loaded.'.format(model_path)) # 画框设置不同的颜色 hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) #---------------------------------------------------# # 检测图片 #---------------------------------------------------# def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) crop_img, x_offset, y_offset = letterbox_image(image, (300, 300)) photo = np.array(crop_img, dtype=np.float64) self.predict_all = [] # 图片预处理,归一化 photo = tf.keras.applications.imagenet_utils.preprocess_input( np.reshape(photo, [1, 300, 300, 3])) #self.ssd_model.summary() preds = self.ssd_model.predict(photo) # 将预测结果进行解码 results = self.bbox_util.detection_out(preds) if len(results[0]) <= 0: return image # 筛选出其中得分高于confidence的框 det_label = results[0][:, 0] det_conf = results[0][:, 1] det_xmin, det_ymin, det_xmax, det_ymax = results[0][:, 2], results[ 0][:, 3], results[0][:, 4], results[0][:, 5] top_indices = [ i for i, conf in enumerate(det_conf) if conf >= self.confidence ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( det_xmin[top_indices], -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims( det_xmax[top_indices], -1), np.expand_dims(det_ymax[top_indices], -1) boxes = ssd_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax, np.array([300, 300]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = (np.shape(image)[0] + np.shape(image)[1]) // 300 for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c) - 1] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) self.result_ = '{} {} {} {} {} {}'.format( "".join(predicted_class.split(" ")), score, left, top, right, bottom) self.predict_all.append(self.result_) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=255) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=255) draw.text(text_origin, str(label, 'UTF-8'), fill=0, font=font) del draw return image
class FRCNN(object): _defaults = { "model_path": 'model_data/voc_weights.h5', "classes_path": 'model_data/voc_classes.txt', "confidence": 0.5, } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" #---------------------------------------------------# # 初始化faster RCNN #---------------------------------------------------# def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.sess = K.get_session() self.config = Config() self.generate() self.bbox_util = BBoxUtility() #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def _get_class(self): classes_path = os.path.expanduser(self.classes_path) with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def generate(self): model_path = os.path.expanduser(self.model_path) assert model_path.endswith( '.h5'), 'Keras model or weights must be a .h5 file.' # 计算总的种类 self.num_classes = len(self.class_names) + 1 # 载入模型,如果原来的模型里已经包括了模型结构则直接载入。 # 否则先构建模型再载入 self.model_rpn, self.model_classifier = frcnn.get_predict_model( self.config, self.num_classes) self.model_rpn.load_weights(self.model_path, by_name=True) self.model_classifier.load_weights(self.model_path, by_name=True, skip_mismatch=True) print('{} model, anchors, and classes loaded.'.format(model_path)) # 画框设置不同的颜色 hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) def get_img_output_length(self, width, height): def get_output_length(input_length): # input_length += 6 filter_sizes = [7, 3, 1, 1] padding = [3, 1, 0, 0] stride = 2 for i in range(4): # input_length = (input_length - filter_size + stride) // stride input_length = (input_length + 2 * padding[i] - filter_sizes[i]) // stride + 1 return input_length return get_output_length(width), get_output_length(height) #---------------------------------------------------# # 检测图片 #---------------------------------------------------# def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) old_width = image_shape[1] old_height = image_shape[0] old_image = copy.deepcopy(image) width, height = get_new_img_size(old_width, old_height) image = image.resize([width, height]) photo = np.array(image, dtype=np.float64) # 图片预处理,归一化 photo = preprocess_input(np.expand_dims(photo, 0)) preds = self.model_rpn.predict(photo) # 将预测结果进行解码 anchors = get_anchors(self.get_img_output_length(width, height), width, height) rpn_results = self.bbox_util.detection_out(preds, anchors, 1, confidence_threshold=0) R = rpn_results[0][:, 2:] R[:, 0] = np.array(np.round(R[:, 0] * width / self.config.rpn_stride), dtype=np.int32) R[:, 1] = np.array(np.round(R[:, 1] * height / self.config.rpn_stride), dtype=np.int32) R[:, 2] = np.array(np.round(R[:, 2] * width / self.config.rpn_stride), dtype=np.int32) R[:, 3] = np.array(np.round(R[:, 3] * height / self.config.rpn_stride), dtype=np.int32) R[:, 2] -= R[:, 0] R[:, 3] -= R[:, 1] base_layer = preds[2] delete_line = [] for i, r in enumerate(R): if r[2] < 1 or r[3] < 1: delete_line.append(i) R = np.delete(R, delete_line, axis=0) bboxes = [] probs = [] labels = [] for jk in range(R.shape[0] // self.config.num_rois + 1): ROIs = np.expand_dims(R[self.config.num_rois * jk:self.config.num_rois * (jk + 1), :], axis=0) if ROIs.shape[1] == 0: break if jk == R.shape[0] // self.config.num_rois: #pad R curr_shape = ROIs.shape target_shape = (curr_shape[0], self.config.num_rois, curr_shape[2]) ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype) ROIs_padded[:, :curr_shape[1], :] = ROIs ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :] ROIs = ROIs_padded [P_cls, P_regr] = self.model_classifier.predict([base_layer, ROIs]) for ii in range(P_cls.shape[1]): if np.max(P_cls[0, ii, :-1]) < self.confidence: continue label = np.argmax(P_cls[0, ii, :-1]) (x, y, w, h) = ROIs[0, ii, :] cls_num = np.argmax(P_cls[0, ii, :-1]) (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)] tx /= self.config.classifier_regr_std[0] ty /= self.config.classifier_regr_std[1] tw /= self.config.classifier_regr_std[2] th /= self.config.classifier_regr_std[3] cx = x + w / 2. cy = y + h / 2. cx1 = tx * w + cx cy1 = ty * h + cy w1 = math.exp(tw) * w h1 = math.exp(th) * h x1 = cx1 - w1 / 2. y1 = cy1 - h1 / 2. x2 = cx1 + w1 / 2 y2 = cy1 + h1 / 2 x1 = int(round(x1)) y1 = int(round(y1)) x2 = int(round(x2)) y2 = int(round(y2)) bboxes.append([x1, y1, x2, y2]) probs.append(np.max(P_cls[0, ii, :-1])) labels.append(label) if len(bboxes) == 0: return old_image # 筛选出其中得分高于confidence的框 labels = np.array(labels) probs = np.array(probs) boxes = np.array(bboxes, dtype=np.float32) boxes[:, 0] = boxes[:, 0] * self.config.rpn_stride / width boxes[:, 1] = boxes[:, 1] * self.config.rpn_stride / height boxes[:, 2] = boxes[:, 2] * self.config.rpn_stride / width boxes[:, 3] = boxes[:, 3] * self.config.rpn_stride / height results = np.array( self.bbox_util.nms_for_out(np.array(labels), np.array(probs), np.array(boxes), self.num_classes - 1, 0.4)) top_label_indices = results[:, 0] top_conf = results[:, 1] boxes = results[:, 2:] boxes[:, 0] = boxes[:, 0] * old_width boxes[:, 1] = boxes[:, 1] * old_height boxes[:, 2] = boxes[:, 2] * old_width boxes[:, 3] = boxes[:, 3] * old_height font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = (np.shape(old_image)[0] + np.shape(old_image)[1]) // old_width * 2 image = old_image for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c)] score = top_conf[i] left, top, right, bottom = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[int(c)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[int(c)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image def close_session(self): self.sess.close()
class Retinaface(object): _defaults = { "model_path": 'model_data/retinaface_mobilenet025.h5', "backbone": 'mobilenet', "confidence": 0.6, "nms_iou": 0.4, #----------------------------------------------------------------------# # 是否需要进行图像大小限制。 # 开启后,会将输入图像的大小限制为input_shape。否则使用原图进行预测。 # keras代码中主干为mobilenet时存在小bug,当输入图像的宽高不为32的倍数 # 会导致检测结果偏差,主干为resnet50不存在此问题。 # 可根据输入图像的大小自行调整input_shape,注意为32的倍数,如[640, 640, 3] #----------------------------------------------------------------------# "input_shape": [1280, 1280, 3], "letterbox_image": False } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" #---------------------------------------------------# # 初始化Retinaface #---------------------------------------------------# def __init__(self, **kwargs): self.__dict__.update(self._defaults) if self.backbone == "mobilenet": self.cfg = cfg_mnet else: self.cfg = cfg_re50 self.bbox_util = BBoxUtility(nms_thresh=self.nms_iou) self.generate() self.anchors = Anchors(self.cfg, image_size=(self.input_shape[0], self.input_shape[1])).get_anchors() def generate(self): model_path = os.path.expanduser(self.model_path) assert model_path.endswith( '.h5'), 'Keras model or weights must be a .h5 file.' self.retinaface = RetinaFace(self.cfg, self.backbone) self.retinaface.load_weights(self.model_path, by_name=True) print('{} model, anchors, and classes loaded.'.format(model_path)) def detect_image(self, image): old_image = image.copy() image = np.array(image, np.float32) im_height, im_width, _ = np.shape(image) scale = [ np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0] ] scale_for_landmarks = [ np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0] ] if self.letterbox_image: image = letterbox_image(image, [self.input_shape[1], self.input_shape[0]]) else: self.anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors() photo = np.expand_dims(preprocess_input(image), 0) preds = self.retinaface.predict(photo) results = self.bbox_util.detection_out( preds, self.anchors, confidence_threshold=self.confidence) if len(results) <= 0: return old_image, [] results = np.array(results) if self.letterbox_image: results = retinaface_correct_boxes( results, np.array([self.input_shape[0], self.input_shape[1]]), np.array([im_height, im_width])) results[:, :4] = results[:, :4] * scale results[:, 5:] = results[:, 5:] * scale_for_landmarks ans = [] for b in results: confidence = b[4].astype(float) each_ans = {'box': [0, 0, 0, 0], 'confidence': 0, 'landmarks': []} text = "{:.4f}".format(b[4]) b = list(map(int, b)) each_ans['box'][0] = b[0] each_ans['box'][1] = b[1] each_ans['box'][2] = b[2] each_ans['box'][3] = b[3] each_ans['confidence'] = confidence cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) cx = b[0] cy = b[1] + 12 cv2.putText(old_image, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) print(b[0], b[1], b[2], b[3], b[4]) cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4) cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4) cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4) cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4) cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4) landmarks = [ (b[5], b[6]), (b[7], b[8]), (b[9], b[10]), (b[11], b[12]), (b[13], b[14]), ] each_ans['landmarks'] = landmarks ans.append(each_ans) return old_image, ans
class detector(object): def __init__(self, weight_path=None): self.classes = config.CLASSES self.input_shape = config.IMAGE_SIZE self._load_weigth(weight_path=weight_path) self.bbox_util = BBoxUtility(len(self.classes)) def _load_weigth(self, weight_path=None): weight_path = os.path.expanduser(weight_path) assert weight_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.' # load weigth file self.model = SSD300(config.IMAGE_SIZE, len(self.classes), anchors=config.ANCHORS_SIZE) self.model.load_weights(weight_path) # Set every class' color hsv_tuples = [(x / len(self.classes), 1., 1.) for x in range(len(self.classes))] self.colors = list(map(lambda x:colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list(map(lambda x:(int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) @tf.function def get_pred(self, photo): preds = self.model(photo, training=False) return preds # Detected Image def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) crop_image,x_offset,y_offset = letterbox_image(image, (self.input_shape[0], self.input_shape[1])) photo = np.array(crop_image, dtype=np.float64) # Normalization photo = preprocess_input(np.reshape(photo, [1, self.input_shape[0], self.input_shape[1], 3])) preds = self.get_pred(photo).numpy() # Decode results = self.bbox_util.detection_out(preds, confidence_threshold=config.CONFIDENCE) if len(results[0]) <= 0: return image det_label = results[0][:, 0] det_conf = results[0][:, 1] det_xmin, det_ymin, det_xmax, det_ymax = results[0][:,2], results[0][:, 3], results[0][:, 4], results[0][:, 5] top_indices = [i for i, conf in enumerate(det_conf) if conf >= config.CONFIDENCE] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin = np.expand_dims(det_xmin[top_indices], axis=-1) top_ymin = np.expand_dims(det_ymin[top_indices], axis=-1) top_xmax = np.expand_dims(det_xmax[top_indices], axis=-1) top_ymax = np.expand_dims(det_ymax[top_indices], axis=-1) boxes = ssd_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax, np.array((self.input_shape[0], self.input_shape[1])), image_shape) font = ImageFont.truetype(font='simhei.ttf',size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.input_shape[0] for i, c in enumerate(top_label_indices): predicted_class = self.classes[int(c) - 1] score = top_conf[i] ymin, xmin, ymax, xmax = boxes[i] ymin = ymin - 5 xmin = xmin - 5 ymax = ymax - 5 xmax = xmax - 5 ymin = max(0, np.floor(ymin + 0.5).astype('int32')) xmin = max(0, np.floor(xmin + 0.5).astype('int32')) ymax = min(np.shape(image)[0], np.floor(ymax + 0.5).astype('int32')) xmax = min(np.shape(image)[1], np.floor(xmax + 0.5).astype('int32')) # draw Bounding box label = "{}:{:.2f}".format(predicted_class, score) print(label) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') if ymin - label_size[1] >= 0: text_origin = np.array((xmin, ymin - label_size[1])) else: text_origin = np.array((xmin, ymin + 1)) for i in range(thickness): draw.rectangle( [xmin + i, ymin + i, xmax - i, ymax - i], outline=self.colors[int(c)-1]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[int(c)-1]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
class ssdT(object): def __init__(self, model, classes, input_shape): self.classes = classes self.num_class = len(classes) + 1 self.model = model self.input_shape = input_shape self.bbox_util = BBoxUtility(self.num_class) def image_test(self, path, inputs=None, oimg=None): bbox_util = BBoxUtility(2) if path != None: img = cv2.imread(path) images = img.copy() img = cv2.resize(img, (self.input_shape[0], self.input_shape[1])) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) inputs = image.img_to_array(img) inputs = preprocess_input(np.array([inputs])) else: images = oimg.copy() preds = self.model.predict(inputs, batch_size=1, verbose=1) results = bbox_util.detection_out(preds) print(results) if len(results) > 0: final = [] for each in results[0]: if each[1] < 0.4: continue xmin = int(each[2] * np.shape(images)[1]) ymin = int(each[3] * np.shape(images)[0]) xmax = int(each[4] * np.shape(images)[1]) ymax = int(each[5] * np.shape(images)[0]) final.append([xmin, ymin, xmax, ymax, each[1]]) return final return None def precision(self, test_path): data = testdata_load(test_path) gnum = 0 rnum = 0 for eachline in data: res = self.image_test(eachline[0]) gtlist = [] temp = [] for i in range(len(eachline)): if i % 5 == 0: continue if i % 5 == 1 and i // 5 > 0: gtlist.append(temp) temp = [] temp.append(int(eachline[i])) gtlist.append(temp) print(res) tnum, pgnum = self.cal_iou(res, gtlist) gnum += pgnum rnum += tnum print("precision:", float(rnum / gnum)) def cal_iou(self, res, gt): if res == None: return 0, len(gt) tnum = 0 for each in gt: gxmin = each[0] gymin = each[1] gxmax = each[2] gymax = each[3] for one in res: overlap = (np.min([gxmax, one[2]]) - np.max( [gxmin, one[0]])) * (np.min([gymax, one[3]]) - np.max([gymin, one[1]])) ares = (one[2] - one[0]) * (one[3] - one[1]) agt = (gxmax - gxmin) * (gymax - gymin) wholea = agt + ares - overlap ratio = overlap / wholea if ratio > 0.7: tnum += 1 return tnum, len(gt) def run(self, model_path, video_path=None, openposeJson=None, out_path=None, start_frame=0, conf_threshold=0.5, model2=None, model3=None): openpose_part = [ "Nose", "Neck", "RShoulder", "RElbow", "RWrist", "LShoulder", "LElbow", "LWrist", "MidHip", "RHip", "RKnee", "RAnkle", "LHip", "LKnee", "LAnkle", "REye", "LEye", "REar", "LEar", "LBigToe", "LSmallToe", "LHeel", "RBigToe", "RSmallToe", "RHeel", "Background" ] fingertips = Fingertips(weights='model_data/finmodel.h5') if video_path == None: return None video = cv2.VideoCapture(video_path) timeline = [] labelline = [] handStatus = [] if out_path: fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(out_path, fourcc, 10.0, (int(video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))), isColor=True) vggmodel = load_model(model_path) if start_frame > 0: video.set(cv2.cv.CV_CAP_PROP_POS_MSEC, start_frame) accum_time = 0 curr_fps = 0 prev_time = timer() feature_params = dict(maxCorners=100, qualityLevel=0.3, minDistance=7, blockSize=7) lk_params = dict(winSize=(15, 15), maxLevel=2, criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03)) color = np.random.randint(0, 255, (100, 3)) num_frame = 0 video_info = {} frame_info = [] lastTime = 0 while True: info, vimage = video.read() milliseconds = video.get(cv2.CAP_PROP_POS_MSEC) seconds = milliseconds / 1000 video_info[str(seconds)] = [] if not info: plt.figure(figsize=(100, 20)) for i in range(len(labelline)): if i == 0 or i == (len(labelline) - 1): continue if labelline[i] != labelline[ i - 1] and labelline[i] != labelline[i + 1]: labelline[i] = labelline[i - 1] for i in range(len(handStatus)): if i == 0 or i == (len(handStatus) - 1): continue if handStatus[i] != handStatus[ i - 1] and handStatus[i] != handStatus[i + 1]: handStatus[i] = handStatus[i - 1] #newlabelline = [] for i in range(len(labelline)): temp = [] #if i - 3 >=0: temp.append(handStatus[i-3]) if i - 2 >= 0: temp.append(labelline[i - 2]) if i - 1 >= 0: temp.append(labelline[i - 1]) temp.append(labelline[i]) if i + 1 < len(labelline): temp.append(labelline[i + 1]) if i + 2 < len(labelline): temp.append(labelline[i + 2]) #if i + 3 < len(handStatus): temp.append(handStatus[i+3]) labelline[i] = Counter(temp).most_common(1)[0][0] for i in range(len(handStatus)): temp = [] #if i - 3 >=0: temp.append(handStatus[i-3]) if i - 2 >= 0: temp.append(handStatus[i - 2]) if i - 1 >= 0: temp.append(handStatus[i - 1]) temp.append(handStatus[i]) if i + 1 < len(handStatus): temp.append(handStatus[i + 1]) if i + 2 < len(handStatus): temp.append(handStatus[i + 2]) #if i + 3 < len(handStatus): temp.append(handStatus[i+3]) handStatus[i] = Counter(temp).most_common(1)[0][0] #np.save("labelline.npy",labelline) plt.plot(timeline, labelline, label='hand exist', color='r') plt.plot(timeline, handStatus, label="hand status", color='b') finaltime = int(float(timeline[-1])) + 2 plt.hlines("hand exist", 0, finaltime, color="green", linestyles="dashed") plt.hlines("hand not exist", 0, finaltime, color="blue", linestyles="dashed") plt.hlines("touch exist", 0, finaltime, color="red", linestyles="dashed") plt.hlines("no touch exist", 0, finaltime, color="green", linestyles="dashed") plt.text(finaltime, "hand exist", "hand detected at each time", fontsize=10) plt.text(finaltime, "hand not exist", "hand not detected at each time", fontsize=10) plt.text(finaltime, "touch exist", "hand detected and touch valid at each time", fontsize=10) plt.text( finaltime, "no touch exist", "no hand or no touch valid though hand detected at each time", fontsize=10) plt.xlabel("time(ms)/per frame", fontsize=20) plt.ylabel( "hand relative label(blue is touch validation label, red is hand detection label)", fontsize=20) plt.legend() plt.savefig(video_path[:-4] + ".jpg") video.release() if out_path: out.release() cv2.destroyAllWindows() with open(video_path[:-4] + ".json", "a") as outfile: json.dump(video_info, outfile, ensure_ascii=False) outfile.write('\n') print("Over") return timeline.append(round(milliseconds, 2)) input_size = (self.input_shape[0], self.input_shape[1]) resized = cv2.resize(vimage, input_size) rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB) inputs = image.img_to_array(rgb) input_image = preprocess_input(np.array([inputs])) res = [[]] #if type(res[0]) != list: res[0] = res[0].tolist() if openposeJson: #res = [[]] video_file_name = os.listdir(openposeJson) body_info = json.load( open(openposeJson + video_file_name[num_frame], "r"))["people"] for h in range(len(body_info)): for x in range(len(body_info[h]["pose_keypoints_2d"])): if int(body_info[h]["pose_keypoints_2d"][4]) != 0: if int(body_info[h]["pose_keypoints_2d"][25]) != 0: distance = int( (body_info[h]["pose_keypoints_2d"][25] - body_info[h]["pose_keypoints_2d"][4]) / 2) else: distance = int( (np.shape(vimage)[0] - body_info[h]["pose_keypoints_2d"][4]) / 2) else: distance = 100 if x / 3 == 4 or x / 3 == 7: tres = [] weightsum = 0 xpos = int(body_info[h]["pose_keypoints_2d"][x]) ypos = int(body_info[h]["pose_keypoints_2d"][x + 1]) elxpos = int(body_info[h]["pose_keypoints_2d"][x - 3]) elypos = int(body_info[h]["pose_keypoints_2d"][x - 2]) if xpos == 0 and ypos == 0: continue if elxpos >= xpos: xmin = ( xpos - distance) if (xpos - distance) > 0 else 0 xmax = (xpos + int(distance / 2)) if ( xpos + int(distance / 2)) < np.shape( vimage)[1] else np.shape(vimage)[1] else: xmin = (xpos - int(distance / 2)) if ( xpos - int(distance / 2)) > 0 else 0 xmax = ( xpos + distance) if (xpos + distance) < np.shape( vimage)[1] else np.shape(vimage)[1] if elypos >= ypos: ymin = ( ypos - distance) if (ypos - distance) > 0 else 0 ymax = (ypos + int(distance / 2)) if ( ypos + int(distance / 2)) < np.shape( vimage)[0] else np.shape(vimage)[0] else: ymin = (ypos - int(distance / 2)) if ( ypos - int(distance / 2)) > 0 else 0 ymax = ( ypos + distance) if (ypos + distance) < np.shape( vimage)[0] else np.shape(vimage)[0] print("distance is", distance, "box is", [xmin, ymin, xmax, ymax]) #cv2.rectangle(vimage,(xmin,ymin),(xmax,ymax),(255,0,0),1) crop_image = vimage[ymin:ymax, xmin:xmax] rgb_crop = cv2.cvtColor( cv2.resize(crop_image, input_size), cv2.COLOR_BGR2RGB) input_crop = preprocess_input( np.array([image.img_to_array(rgb_crop)])) if model2 == None or model3 == None: if len(res) > 0: res[0].append( self.bbox_util.detection_out( self.model.predict(input_crop))[0] [0]) else: if len( combine(self, model2, model3, None, input_crop, crop_image)) > 0: #indexpro = np.array(combine(self,model2,model3, None, input_crop,crop_image))[:,1] #maxindex = np.where(indexpro == np.max(indexpro))[0][0] #each = combine(self,model2,model3, None, input_crop,crop_image)[maxindex] for each in combine( self, model2, model3, None, input_crop, crop_image): #print(each) if each[1] < conf_threshold: continue #weightsum += each[1] if each[2] <= 1 and each[ 3] <= 1 and each[ 4] <= 1 and each[5] <= 1: each[2] = int( each[2] * np.shape(crop_image)[1]) + xmin each[3] = int( each[3] * np.shape(crop_image)[0]) + ymin each[4] = int( each[4] * np.shape(crop_image)[1]) + xmin each[5] = int( each[5] * np.shape(crop_image)[0]) + ymin else: each[2] = int(each[2]) + xmin each[3] = int(each[3]) + ymin each[4] = int(each[4]) + xmin each[5] = int(each[5]) + ymin res[0].append(each) print("res is", res) #tres.append(each) """ finalbox = [1,1,0,0,0,0] for each in tres: finalbox[2] = int(finalbox[2] + each[2] * each[1]/weightsum) finalbox[3] = int(finalbox[3] + each[3] * each[1]/weightsum) finalbox[4] = int(finalbox[4] + each[4] * each[1]/weightsum) finalbox[5] = int(finalbox[5] + each[5] * each[1]/weightsum) """ #print(xpos, ypos) if len(res[0]) == 0: if model2 == None or model3 == None: pred = self.model.predict(input_image) res = self.bbox_util.detection_out(pred) else: #ssd ensemble learning res = [ combine(self, model2, model3, None, input_image, vimage) ] if len(res) > 0 and len(res[0]) > 0: #labelline.append("hand exist") #deal with each frame temp = {} temp["hand"] = "exist" temp["hand status"] = [] temp["body part"] = [] temp["hand position"] = [] for each in res[0]: if each[1] < conf_threshold: continue if each[2] <= 1 and each[3] <= 1 and each[4] <= 1 and each[ 5] <= 1: xmin = int(each[2] * np.shape(vimage)[1]) ymin = int(each[3] * np.shape(vimage)[0]) xmax = int(each[4] * np.shape(vimage)[1]) ymax = int(each[5] * np.shape(vimage)[0]) else: xmin = int(each[2]) ymin = int(each[3]) xmax = int(each[4]) ymax = int(each[5]) test_img = vimage[ymin:ymax, xmin:xmax] height, width, _ = test_img.shape if height < 5 or width < 5: finum = 0 continue else: temp["hand position"].append([xmin, ymin, xmax, ymax]) # gesture classification and fingertips regression prob, pos = fingertips.classify(image=test_img) pos = np.mean(pos, 0) # post-processing prob = np.asarray([(p >= 0.5) * 1.0 for p in prob]) for i in range(0, len(pos), 2): pos[i] = pos[i] * width + xmin pos[i + 1] = pos[i + 1] * height + ymin # drawing index = 0 color = [(15, 15, 240), (15, 240, 155), (240, 155, 15), (240, 15, 155), (240, 15, 240)] #image = cv2.rectangle(image, (tl[0], tl[1]), (br[0], br[1]), (235, 26, 158), 2) finum = 0 for c, p in enumerate(prob): if p > 0.5: finum += 1 vimage = cv2.circle( vimage, (int(pos[index]), int(pos[index + 1])), radius=12, color=color[c], thickness=-2) index = index + 2 #edge post process """ edges = edge(None,test_img) edges = cv2.cvtColor(edges, cv2.COLOR_GRAY2RGB) test_img = cv2.subtract(test_img, edges) test_imgr90 = cv2.flip(cv2.transpose(test_img), 1) test_imgl90 = cv2.flip(cv2.transpose(test_img), 0) #test_imgr90 = cv2.flip(cv2.transpose(test_img), 1) test_imgr90 = cv2.resize(test_imgr90,(224,224)) test_imgl90 = cv2.resize(test_imgl90,(224,224)) test_imgr90 = preprocess_input(test_imgr90) test_imgl90 = preprocess_input(test_imgl90) test_img = cv2.resize(test_img, (224,224)) test_img = preprocess_input(test_img) #vgg submodel detection ans1 = vggmodel.predict(test_img.reshape(1,224,224,3)) #ans2 = vggmodel.predict(test_imgr90.reshape(1,224,224,3)) #ans3 = vggmodel.predict(test_imgl90.reshape(1,224,224,3)) pos = [ans1[0][0]] """ body_in = [] #for result in pos: # if result > 0.85: flag += 1 #print(flag) cv2.rectangle(vimage, (xmin, ymin), (xmax, ymax), color=(255, 0, 0), thickness=2) cv2.putText(vimage, "hand", (xmin, ymin - 3), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 255, 0), 1) """ if flag == 0: for result in pos: result = result + 0.1 * (finum - 1) if result > 0.7 and finum >= 2: flag += 1 if finum >= 3: flag += 1 """ flag = 0 if flag == 0: vect1 = [xmin, ymin, xmax, ymax] pastTrue = 0 #print(frame_info) for framebefore in range(len(frame_info)): if frame_info[len(frame_info) - 1 - framebefore][0] == lastTime: t = frame_info[len(frame_info) - 1 - framebefore] vect2 = t[3:] vwidth = np.min([xmax, vect2[2]]) - np.max( [xmin, vect2[0]]) + 1 vheight = np.min([ymax, vect2[3]]) - np.max( [ymin, vect2[1]]) + 1 if vwidth < 0 or vheight < 0: continue nsq = (ymax - ymin + 1) * (xmax - xmin + 1) print("overlap fration:", vwidth * vheight / nsq) if vwidth * vheight / nsq > 0.6: pastTrue += 1 elif frame_info[len(frame_info) - 1 - framebefore][0] < lastTime: break if pastTrue > 0 and finum >= 1: flag += 1 #flag = 1 if openposeJson: video_file_name = os.listdir(openposeJson) body_info = json.load( open(openposeJson + video_file_name[num_frame], "r"))["people"] for h in range(len(body_info)): partsplit = { "main body": [], "left hand above": [], "left hand below": [], "right hand above": [], "right hand below": [], "left leg above": [], "left leg below": [], "right leg above": [], "right leg below": [], "head": [] } detail = body_info[h]["pose_keypoints_2d"] if detail[51] != 0 and detail[54] != 0 and detail[ 4] != 0: xminpos = int( np.minimum(detail[54], detail[51])) - 5 yminpos = int(detail[52]) - 50 xmaxpos = int( np.maximum(detail[51], detail[54])) + 5 ymaxpos = int(detail[4]) partsplit["head"] = [ xminpos, yminpos, xmaxpos, ymaxpos ] if detail[6] != 0 and detail[15] != 0: xminpos = int(np.minimum( detail[15], detail[6])) yminpos = int(np.minimum( detail[7], detail[16])) xmaxpos = int(np.maximum( detail[6], detail[15])) if detail[24] != 0: ymaxpos = int(detail[25]) else: ymaxpos = np.shape(vimage)[0] partsplit["main body"] = [ xminpos, yminpos, xmaxpos, ymaxpos ] if detail[9] != 0: xminpos = int( np.minimum(detail[6], detail[9])) yminpos = int( np.minimum(detail[7], detail[10])) xmaxpos = int( np.maximum(detail[6], detail[9])) ymaxpos = int( np.maximum(detail[7], detail[10])) partsplit["right hand above"] = [ xminpos, yminpos, xmaxpos, ymaxpos ] if detail[12] != 0: xminpos = int( np.minimum(detail[12], detail[9])) yminpos = int( np.minimum(detail[13], detail[10])) xmaxpos = int( np.maximum(detail[12], detail[9])) ymaxpos = int( np.maximum(detail[13], detail[10])) partsplit["right hand below"] = [ xminpos, yminpos, xmaxpos, ymaxpos ] if detail[18] != 0: xminpos = int( np.minimum(detail[15], detail[18])) yminpos = int( np.minimum(detail[16], detail[19])) xmaxpos = int( np.maximum(detail[15], detail[18])) ymaxpos = int( np.maximum(detail[16], detail[19])) partsplit["left hand above"] = [ xminpos, yminpos, xmaxpos, ymaxpos ] if detail[21] != 0: xminpos = int( np.minimum(detail[21], detail[18])) yminpos = int( np.minimum(detail[22], detail[19])) xmaxpos = int( np.maximum(detail[21], detail[18])) ymaxpos = int( np.maximum(detail[22], detail[19])) partsplit["left hand below"] = [ xminpos, yminpos, xmaxpos, ymaxpos ] if detail[27] != 0 and detail[30] != 0: xminpos = int( np.minimum(detail[24], detail[30])) yminpos = int( np.minimum(detail[28], detail[31])) xmaxpos = int( np.maximum(detail[24], detail[30])) ymaxpos = int( np.maximum(detail[28], detail[31])) partsplit["right leg above"] = [ xminpos, yminpos, xmaxpos, ymaxpos ] if detail[33] != 0: xminpos = int( np.minimum(detail[30], detail[33])) yminpos = int( np.minimum(detail[31], detail[34])) xmaxpos = int( np.maximum(detail[30], detail[33])) ymaxpos = int( np.maximum(detail[31], detail[34])) partsplit["right leg below"] = [ xminpos, yminpos, xmaxpos, ymaxpos ] if detail[36] != 0 and detail[39] != 0: xminpos = int( np.minimum(detail[24], detail[39])) yminpos = int( np.minimum(detail[37], detail[40])) xmaxpos = int( np.maximum(detail[24], detail[39])) ymaxpos = int( np.maximum(detail[37], detail[40])) partsplit["left leg above"] = [ xminpos, yminpos, xmaxpos, ymaxpos ] if detail[42] != 0: xminpos = int( np.minimum(detail[39], detail[42])) yminpos = int( np.minimum(detail[40], detail[43])) xmaxpos = int( np.maximum(detail[39], detail[42])) ymaxpos = int( np.maximum(detail[40], detail[43])) partsplit["left leg below"] = [ xminpos, yminpos, xmaxpos, ymaxpos ] for x in range( len(body_info[h]["pose_keypoints_2d"])): if x % 3 == 0 and x / 3 != 4 and x / 3 != 7: xpos = int( body_info[h]["pose_keypoints_2d"][x]) ypos = int( body_info[h]["pose_keypoints_2d"][x + 1]) #print(xpos, ypos) if (xpos >= xmin and xpos <= xmax) and ( ypos >= ymin and ypos <= ymax): body_in.append(openpose_part[x // 3]) if True: for keyname in partsplit.keys(): if partsplit[keyname] != []: btemp = partsplit[keyname] #print(btemp) owidth = np.minimum( btemp[2], xmax) - np.maximum( xmin, btemp[0]) + 1 oheight = np.minimum( btemp[3], ymax) - np.maximum( ymin, btemp[1]) + 1 wholehand = (ymax - ymin + 1) * (xmax - xmin + 1) cv2.rectangle(vimage, (btemp[0], btemp[1]), (btemp[2], btemp[3]), (0, 0, 255), 1) cv2.putText( vimage, keyname, (int( (btemp[2] + btemp[0]) / 2) - 1, btemp[1] - 3), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 255, 255), 1) #if keyname == "main body": # cv2.putText(vimage,keyname,(btemp[0], btemp[3] + 3), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0,255,255), 1) # print("main body is", btemp,"hand is",[xmin,ymin,xmax,ymax]) if owidth < 0 or oheight < 0: continue oarea = owidth * oheight print("keyname is", keyname) print("flag is", flag) print("btemp is", btemp, "hand is", [xmin, ymin, xmax, ymax]) print("fraction is:", oarea / wholehand) if oarea / wholehand > 0.2: body_in.append(keyname) #print("body",btemp,"hand",[xmin,ymin,xmax,ymax]) #print((res)) for i in range(len(res[0])): if res[0][i][1] < conf_threshold: continue for j in range(i + 1, len(res[0])): if res[0][j][1] < conf_threshold: continue temp1 = res[0][i] temp2 = res[0][j] width = np.min([ int(temp1[4]), int(temp2[4]) ]) - np.max([int(temp1[2]), int(temp2[2])]) + 1 height = np.min([ int(temp1[5]), int(temp2[5]) ]) - np.max([int(temp1[3]), int(temp2[3])]) + 1 if width < 0 or height < 0: continue area1 = (temp1[5] - temp1[3] + 1) * (temp1[4] - temp1[2] + 1) area2 = (temp2[5] - temp2[3] + 1) * (temp2[4] - temp2[2] + 1) overlap = width * height ratio = overlap / (area1 + area2 - overlap) if ratio > 0.6: body_in.append("hand") print("body part is", body_in) frame_info.append( [milliseconds, flag, finum, xmin, ymin, xmax, ymax]) if flag > 0 and len(body_in) != 0: cv2.putText(vimage, "touch", (xmax, ymin - 3), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 0, 255), 1) temp["hand status"].append("touch") else: cv2.putText(vimage, "non - touch", (xmax, ymin - 3), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 0, 255), 1) temp["hand status"].append("non - touch") if temp["hand status"][-1] == "touch": temp["body part"].append(body_in) else: temp["body part"].append([]) if len(temp["hand status"]) == 0: video_info[str(seconds)].append("hand not exist") labelline.append("hand not exist") else: video_info[str(seconds)].append(temp) labelline.append("hand exist") if "touch" in temp["hand status"]: handStatus.append("touch exist") else: handStatus.append("no touch exist") else: video_info[str(seconds)].append("hand not exist") labelline.append("hand not exist") handStatus.append("no touch exist") curr_time = timer() exec_time = curr_time - prev_time prev_time = curr_time accum_time += exec_time curr_fps = int(1 / exec_time) num_frame += 1 lastTime = milliseconds #print(curr_time, res[0]) fps = "FPS:" + str(curr_fps) curr_fps = 0 cv2.rectangle(vimage, (0, 0), (50, 17), (255, 255, 255), -1) cv2.putText(vimage, fps, (3, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 0, 0), 1) cv2.imshow("SSD result", vimage) out.write(vimage) cv2.waitKey(1)
def __init__(self, model, classes, input_shape): self.classes = classes self.num_class = len(classes) + 1 self.model = model self.input_shape = input_shape self.bbox_util = BBoxUtility(self.num_class)
model_save_folder = "./logs/" h5_file_name = "ssd_vgg_512" ######################################################## gpus = tf.config.experimental.list_physical_devices(device_type='GPU') for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) if not os.path.exists(model_save_folder): os.mkdir(model_save_folder) if __name__ == "__main__": priors = get_anchors_512((img_height, img_width)) bbox_util = BBoxUtility(len(config.CLASSES), priors) model = SSD512((img_height, img_width, img_channels), n_classes=len(config.CLASSES), anchors=anchors, variances=variances) checkpoint = ModelCheckpoint(model_save_folder + "ssd_vgg_epoch{epoch:02d}.h5", monitor='val_loss', save_weights_only=True, save_best_only=True) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1)
# 训练自己的数据集时提示维度不匹配正常 # 预测的东西都不一样了自然维度不匹配 #------------------------------------------------------# model_path = "model_data/efficientdet-d0-voc.h5" #------------------------------------------------------# # 创建Efficientdet模型 #------------------------------------------------------# model = Efficientdet(phi, num_classes=num_classes) model.load_weights(model_path, by_name=True, skip_mismatch=True) #-------------------------------# # 获得先验框 #-------------------------------# priors = get_anchors(image_sizes[phi]) bbox_util = BBoxUtility(num_classes, priors) #----------------------------------------------------------------------# # 验证集的划分在train.py代码里面进行 # 2007_test.txt和2007_val.txt里面没有内容是正常的。训练不会使用到。 # 当前划分方式下,验证集和训练集的比例为1:9 #----------------------------------------------------------------------# val_split = 0.1 with open(annotation_path) as f: lines = f.readlines() np.random.seed(10101) np.random.shuffle(lines) np.random.seed(None) num_val = int(len(lines) * val_split) num_train = len(lines) - num_val
def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.sess = tf.compat.v1.keras.backend.get_session self.generate() self.bbox_util = BBoxUtility(self.num_classes)
# 视频上为600,600,3,多次训练测试后发现800,800,3更优 #-----------------------------------------------------# input_shape = [800, 800, 3] model_rpn, model_all = get_model(config, NUM_CLASSES) #------------------------------------------------------# # 权值文件请看README,百度网盘下载 # 训练自己的数据集时提示维度不匹配正常 # 预测的东西都不一样了自然维度不匹配 #------------------------------------------------------# base_net_weights = "model_data/voc_weights.h5" model_rpn.load_weights(base_net_weights, by_name=True) model_all.load_weights(base_net_weights, by_name=True) bbox_util = BBoxUtility(overlap_threshold=config.rpn_max_overlap, ignore_threshold=config.rpn_min_overlap, top_k=config.num_RPN_train_pre) #--------------------------------------------# # 训练参数的设置 #--------------------------------------------# callback = tf.summary.create_file_writer("logs") loss_history = LossHistory("logs/") annotation_path = '2007_train.txt' #----------------------------------------------------------------------# # 验证集的划分在train.py代码里面进行 # 2007_test.txt和2007_val.txt里面没有内容是正常的。训练不会使用到。 # 当前划分方式下,验证集和训练集的比例为1:9 #----------------------------------------------------------------------# val_split = 0.1
raise ValueError('Unsupported backbone - `{}`, Use mobilenet, resnet50.'.format(backbone)) img_dim = cfg['image_size'] #-------------------------------# # 创立模型 #-------------------------------# model = RetinaFace(cfg, backbone=backbone) model_path = "model_data/retinaface_mobilenet025.h5" model.load_weights(model_path,by_name=True,skip_mismatch=True) #-------------------------------# # 获得先验框和工具箱 #-------------------------------# anchors = Anchors(cfg, image_size=(img_dim, img_dim)).get_anchors() bbox_util = BBoxUtility(anchors) # 训练参数设置 logging = TensorBoard(log_dir="logs") checkpoint = ModelCheckpoint('logs/ep{epoch:03d}-loss{loss:.3f}.h5', monitor='loss', save_weights_only=True, save_best_only=False, period=1) reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.5, patience=2, verbose=1) early_stopping = EarlyStopping(monitor='loss', min_delta=0, patience=6, verbose=1) for i in range(freeze_layers): model.layers[i].trainable = False print('Freeze the first {} layers of total {} layers.'.format(freeze_layers, len(model.layers))) #------------------------------------------------------# # 主干特征提取网络特征通用,冻结训练可以加快训练速度 # 也可以在训练初期防止权值被破坏。 # Init_Epoch为起始世代
class EfficientDet(object): _defaults = { "model_path": 'model_data/efficientdet-d0-voc.h5', "classes_path": 'model_data/voc_classes.txt', "phi": 0, "confidence": 0.4, "iou": 0.3, } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" #---------------------------------------------------# # 初始化efficientdet #---------------------------------------------------# def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.model_image_size = [ image_sizes[self.phi], image_sizes[self.phi], 3 ] self.sess = K.get_session() self.generate() self.bbox_util = BBoxUtility(self.num_classes, nms_thresh=self.iou) self.prior = self._get_prior() #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def _get_class(self): classes_path = os.path.expanduser(self.classes_path) with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names #---------------------------------------------------# # 获得先验框 #---------------------------------------------------# def _get_prior(self): data = get_anchors(image_sizes[self.phi]) return data #---------------------------------------------------# # 载入模型 #---------------------------------------------------# def generate(self): model_path = os.path.expanduser(self.model_path) assert model_path.endswith( '.h5'), 'Keras model or weights must be a .h5 file.' #----------------------------------------# # 计算种类数量 #----------------------------------------# self.num_classes = len(self.class_names) #----------------------------------------# # 创建Efficientdet模型 #----------------------------------------# self.Efficientdet = Efficientdet(self.phi, self.num_classes) self.Efficientdet.load_weights(self.model_path) print('{} model, anchors, and classes loaded.'.format(model_path)) # 画框设置不同的颜色 hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) #---------------------------------------------------# # 检测图片 #---------------------------------------------------# def detect_image(self, image): #---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 #---------------------------------------------------------# image = image.convert('RGB') image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize #---------------------------------------------------------# crop_img = letterbox_image( image, [self.model_image_size[1], self.model_image_size[0]]) #-----------------------------------------------------------# # 图片预处理,归一化。获得的photo的shape为[1, 512, 512, 3] #-----------------------------------------------------------# photo = np.array(crop_img, dtype=np.float32) photo = np.reshape(preprocess_input(photo), [ 1, self.model_image_size[0], self.model_image_size[1], self.model_image_size[2] ]) preds = self.Efficientdet.predict(photo) #-----------------------------------------------------------# # 将预测结果进行解码 #-----------------------------------------------------------# results = self.bbox_util.detection_out( preds, self.prior, confidence_threshold=self.confidence) #--------------------------------------# # 如果没有检测到物体,则返回原图 #--------------------------------------# if len(results[0]) <= 0: return image results = np.array(results) det_label = results[0][:, 5] det_conf = results[0][:, 4] det_xmin, det_ymin, det_xmax, det_ymax = results[0][:, 0], results[ 0][:, 1], results[0][:, 2], results[0][:, 3] #-----------------------------------------------------------# # 筛选出其中得分高于confidence的框 #-----------------------------------------------------------# top_indices = [ i for i, conf in enumerate(det_conf) if conf >= self.confidence ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( det_xmin[top_indices], -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims( det_xmax[top_indices], -1), np.expand_dims(det_ymax[top_indices], -1) #-----------------------------------------------------------# # 去掉灰条部分 #-----------------------------------------------------------# boxes = efficientdet_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = max((np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0], 1) for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c)] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[int(c)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[int(c)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image def get_FPS(self, image, test_interval): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize #---------------------------------------------------------# crop_img = letterbox_image( image, [self.model_image_size[1], self.model_image_size[0]]) #-----------------------------------------------------------# # 图片预处理,归一化。获得的photo的shape为[1, 512, 512, 3] #-----------------------------------------------------------# photo = np.array(crop_img, dtype=np.float32) photo = np.reshape(preprocess_input(photo), [ 1, self.model_image_size[0], self.model_image_size[1], self.model_image_size[2] ]) preds = self.Efficientdet.predict(photo) #-----------------------------------------------------------# # 将预测结果进行解码 #-----------------------------------------------------------# results = self.bbox_util.detection_out( preds, self.prior, confidence_threshold=self.confidence) if len(results[0]) > 0: results = np.array(results) det_label = results[0][:, 5] det_conf = results[0][:, 4] det_xmin, det_ymin, det_xmax, det_ymax = results[0][:, 0], results[ 0][:, 1], results[0][:, 2], results[0][:, 3] #-----------------------------------------------------------# # 筛选出其中得分高于confidence的框 #-----------------------------------------------------------# top_indices = [ i for i, conf in enumerate(det_conf) if conf >= self.confidence ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( det_xmin[top_indices], -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims( det_xmax[top_indices], -1), np.expand_dims(det_ymax[top_indices], -1) #-----------------------------------------------------------# # 去掉灰条部分 #-----------------------------------------------------------# boxes = efficientdet_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) t1 = time.time() for _ in range(test_interval): preds = self.Efficientdet.predict(photo) #-----------------------------------------------------------# # 将预测结果进行解码 #-----------------------------------------------------------# results = self.bbox_util.detection_out( preds, self.prior, confidence_threshold=self.confidence) if len(results[0]) > 0: results = np.array(results) det_label = results[0][:, 5] det_conf = results[0][:, 4] det_xmin, det_ymin, det_xmax, det_ymax = results[ 0][:, 0], results[0][:, 1], results[0][:, 2], results[0][:, 3] #-----------------------------------------------------------# # 筛选出其中得分高于confidence的框 #-----------------------------------------------------------# top_indices = [ i for i, conf in enumerate(det_conf) if conf >= self.confidence ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( det_xmin[top_indices], -1), np.expand_dims( det_ymin[top_indices], -1), np.expand_dims( det_xmax[top_indices], -1), np.expand_dims(det_ymax[top_indices], -1) #-----------------------------------------------------------# # 去掉灰条部分 #-----------------------------------------------------------# boxes = efficientdet_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array( [self.model_image_size[0], self.model_image_size[1]]), image_shape) t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time def close_session(self): self.sess.close()
class Retinaface(object): _defaults = { "model_path": 'model_data/retinaface_mobilenet025.h5', "backbone": 'mobilenet', "confidence": 0.5, "nms_iou": 0.45, #----------------------------------------------------------------------# # 是否需要进行图像大小限制。 # 开启后,会将输入图像的大小限制为input_shape。否则使用原图进行预测。 # tf2代码中主干为mobilenet时存在小bug,当输入图像的宽高不为32的倍数 # 会导致检测结果偏差,主干为resnet50不存在此问题。 # 可根据输入图像的大小自行调整input_shape,注意为32的倍数,如[640, 640, 3] #----------------------------------------------------------------------# "input_shape": [1280, 1280, 3], "letterbox_image": True } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" #---------------------------------------------------# # 初始化Retinaface #---------------------------------------------------# def __init__(self, **kwargs): self.__dict__.update(self._defaults) if self.backbone == "mobilenet": self.cfg = cfg_mnet else: self.cfg = cfg_re50 self.bbox_util = BBoxUtility(nms_thresh=self.nms_iou) self.generate() self.anchors = Anchors(self.cfg, image_size=(self.input_shape[0], self.input_shape[1])).get_anchors() #---------------------------------------------------# # 载入模型 #---------------------------------------------------# def generate(self): model_path = os.path.expanduser(self.model_path) assert model_path.endswith( '.h5'), 'tensorflow.keras model or weights must be a .h5 file.' #-------------------------------# # 载入模型与权值 #-------------------------------# self.retinaface = RetinaFace(self.cfg, self.backbone) self.retinaface.load_weights(self.model_path) print('{} model, anchors loaded.'.format(self.model_path)) @tf.function def get_pred(self, photo): preds = self.retinaface(photo, training=False) return preds #---------------------------------------------------# # 检测图片 #---------------------------------------------------# def detect_image(self, image): #---------------------------------------------------# # 对输入图像进行一个备份,后面用于绘图 #---------------------------------------------------# old_image = image.copy() image = np.array(image, np.float32) im_height, im_width, _ = np.shape(image) #---------------------------------------------------# # 计算scale,用于将获得的预测框转换成原图的高宽 #---------------------------------------------------# scale = [ np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0] ] scale_for_landmarks = [ np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0] ] #---------------------------------------------------------# # letterbox_image可以给图像增加灰条,实现不失真的resize #---------------------------------------------------------# if self.letterbox_image: image = letterbox_image(image, [self.input_shape[1], self.input_shape[0]]) else: self.anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors() #-----------------------------------------------------------# # 图片预处理,归一化。 #-----------------------------------------------------------# photo = np.expand_dims(preprocess_input(image), 0) preds = self.get_pred(photo) preds = [pred.numpy() for pred in preds] #-----------------------------------------------------------# # 将预测结果进行解码 #-----------------------------------------------------------# results = self.bbox_util.detection_out( preds, self.anchors, confidence_threshold=self.confidence) #--------------------------------------# # 如果没有检测到物体,则返回原图 #--------------------------------------# if len(results) <= 0: return old_image results = np.array(results) #---------------------------------------------------------# # 如果使用了letterbox_image的话,要把灰条的部分去除掉。 #---------------------------------------------------------# if self.letterbox_image: results = retinaface_correct_boxes( results, np.array([self.input_shape[0], self.input_shape[1]]), np.array([im_height, im_width])) results[:, :4] = results[:, :4] * scale results[:, 5:] = results[:, 5:] * scale_for_landmarks for b in results: text = "{:.4f}".format(b[4]) b = list(map(int, b)) # b[0]-b[3]为人脸框的坐标,b[4]为得分 cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) cx = b[0] cy = b[1] + 12 cv2.putText(old_image, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) print(b[0], b[1], b[2], b[3], b[4]) # b[5]-b[14]为人脸关键点的坐标 cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4) cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4) cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4) cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4) cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4) return old_image
import keras from keras.optimizers import Adam from nets.retinanet_training import Generator from nets.retinanet_training import focal, smooth_l1 from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping from utils.utils import BBoxUtility from utils.anchors import get_anchors if __name__ == "__main__": NUM_CLASSES = 20 input_shape = (600, 600, 3) annotation_path = '2007_train.txt' inputs = keras.layers.Input(shape=input_shape) model = retinanet.resnet_retinanet(NUM_CLASSES, inputs) priors = get_anchors(model) bbox_util = BBoxUtility(NUM_CLASSES, priors) #-------------------------------------------# # 权值文件的下载请看README #-------------------------------------------# model.load_weights("model_data/resnet50_coco_best_v2.1.0.h5", by_name=True, skip_mismatch=True) # 0.1用于验证,0.9用于训练 val_split = 0.1 with open(annotation_path) as f: lines = f.readlines() np.random.seed(10101) np.random.shuffle(lines) np.random.seed(None)
class Retinaface(object): _defaults = { "model_path": 'model_data/retinaface_mobilenet025.h5', "backbone": "mobilenet", "confidence": 0.5, } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" #---------------------------------------------------# # 初始化Retinaface #---------------------------------------------------# def __init__(self, **kwargs): self.__dict__.update(self._defaults) if self.backbone == "mobilenet": self.cfg = cfg_mnet else: self.cfg = cfg_re50 self.bbox_util = BBoxUtility() self.generate() #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def generate(self): model_path = os.path.expanduser(self.model_path) assert model_path.endswith( '.h5'), 'tensorflow.keras model or weights must be a .h5 file.' # 加快模型训练的效率 print('Loading weights into state dict...') # 载入模型 self.retinaface = RetinaFace(self.cfg, self.backbone) self.retinaface.load_weights(self.model_path) print('{} model, anchors loaded.'.format(self.model_path)) @tf.function def get_pred(self, photo): preds = self.retinaface(photo, training=False) return preds #---------------------------------------------------# # 检测图片 #---------------------------------------------------# def detect_image(self, image): old_image = image.copy() image = np.array(image, np.float32) im_height, im_width, _ = np.shape(image) scale = [im_width, im_height, im_width, im_height] scale_for_landmarks = [ im_width, im_height, im_width, im_height, im_width, im_height, im_width, im_height, im_width, im_height ] # 图片预处理,归一化 photo = np.expand_dims(preprocess_input(image), 0) anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors() preds = self.get_pred(photo) preds = [pred.numpy() for pred in preds] # 将预测结果进行解码和非极大抑制 results = self.bbox_util.detection_out( preds, anchors, confidence_threshold=self.confidence) if len(results) <= 0: return old_image, 0, 0 results = np.array(results) results[:, :4] = results[:, :4] * scale results[:, 5:] = results[:, 5:] * scale_for_landmarks for b in results: text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) ##################### global cnt, t0, t1 t1 = time.time() image_clip = old_image # if t1 - t0 > 1: # t0 = t1 # image_clip = old_image[b[1]-20:b[3]+20, b[0]-20:b[2]+20] image_clip = old_image[b[1]:b[3], b[0]:b[2]] image_clip = cv2.cvtColor(image_clip, cv2.COLOR_RGB2BGR) # 保存剪切的图片 # cv2.imshow("clip", image_clip) # cv2.imwrite("savedImg/wang/" + str(t1) + ".png", image_clip) # cnt += 1 # print(cnt) ##################### cx = b[0] cy = b[1] + 12 cv2.putText(old_image, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) # landms cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4) cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4) cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4) cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4) cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4) return old_image, image_clip, len(results)
class SSD(object): _defaults = { "model_path": 'model_data/essay_mobilenet_ssd_weights.h5', "classes_path": 'model_data/voc_classes.txt', "input_shape": (300, 300, 3), "confidence": 0.4, "nms_iou": 0.45, 'anchors_size': [30, 60, 111, 162, 213, 264, 315], #---------------------------------------------------------------------# # 该变量用于控制是否使用letterbox_image对输入图像进行不失真的resize, # 在多次测试后,发现关闭letterbox_image直接resize的效果更好 #---------------------------------------------------------------------# "letterbox_image": False, } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" #---------------------------------------------------# # 初始化ssd #---------------------------------------------------# def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.sess = K.get_session() self.generate() self.bbox_util = BBoxUtility(self.num_classes, nms_thresh=self.nms_iou) #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def _get_class(self): classes_path = os.path.expanduser(self.classes_path) with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names #---------------------------------------------------# # 载入模型 #---------------------------------------------------# def generate(self): model_path = os.path.expanduser(self.model_path) assert model_path.endswith( '.h5'), 'Keras model or weights must be a .h5 file.' #-------------------------------# # 计算总的类的数量 #-------------------------------# self.num_classes = len(self.class_names) + 1 #-------------------------------# # 载入模型与权值 #-------------------------------# self.ssd_model = ssd.SSD300(self.input_shape, self.num_classes, anchors_size=self.anchors_size) self.ssd_model.load_weights(self.model_path, by_name=True) print('{} model, anchors, and classes loaded.'.format(model_path)) # 画框设置不同的颜色 hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) #---------------------------------------------------# # 检测图片 #---------------------------------------------------# def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# if self.letterbox_image: crop_img = np.array( letterbox_image(image, (self.input_shape[1], self.input_shape[0]))) else: crop_img = image.convert('RGB') crop_img = crop_img.resize( (self.input_shape[1], self.input_shape[0]), Image.BICUBIC) photo = np.array(crop_img, dtype=np.float64) #-----------------------------------------------------------# # 图片预处理,归一化。 #-----------------------------------------------------------# photo = preprocess_input( np.reshape(photo, [1, self.input_shape[0], self.input_shape[1], 3])) preds = self.ssd_model.predict(photo) #-----------------------------------------------------------# # 将预测结果进行解码 #-----------------------------------------------------------# results = self.bbox_util.detection_out( preds, confidence_threshold=self.confidence) #--------------------------------------# # 如果没有检测到物体,则返回原图 #--------------------------------------# if len(results[0]) <= 0: return image #-----------------------------------------------------------# # 筛选出其中得分高于confidence的框 #-----------------------------------------------------------# det_label = results[0][:, 0] det_conf = results[0][:, 1] det_xmin, det_ymin, det_xmax, det_ymax = results[0][:, 2], results[ 0][:, 3], results[0][:, 4], results[0][:, 5] top_indices = [ i for i, conf in enumerate(det_conf) if conf >= self.confidence ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( det_xmin[top_indices], -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims( det_xmax[top_indices], -1), np.expand_dims(det_ymax[top_indices], -1) #-----------------------------------------------------------# # 去掉灰条部分 #-----------------------------------------------------------# if self.letterbox_image: boxes = ssd_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.input_shape[0], self.input_shape[1]]), image_shape) else: top_xmin = top_xmin * image_shape[1] top_ymin = top_ymin * image_shape[0] top_xmax = top_xmax * image_shape[1] top_ymax = top_ymax * image_shape[0] boxes = np.concatenate([top_ymin, top_xmin, top_ymax, top_xmax], axis=-1) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = max( (np.shape(image)[0] + np.shape(image)[1]) // self.input_shape[0], 1) for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c) - 1] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[int(c) - 1]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[int(c) - 1]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image def close_session(self): self.sess.close()
class FRCNN(object): _defaults = { "model_path": 'model_data/voc_weights.h5', "classes_path": 'model_data/voc_classes.txt', "confidence": 0.5, "iou": 0.3 } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" #---------------------------------------------------# # 初始化faster RCNN #---------------------------------------------------# def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.config = Config() self.generate() self.bbox_util = BBoxUtility() #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def _get_class(self): classes_path = os.path.expanduser(self.classes_path) with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names #---------------------------------------------------# # 获得所有的分类 #---------------------------------------------------# def generate(self): model_path = os.path.expanduser(self.model_path) assert model_path.endswith( '.h5'), 'Keras model or weights must be a .h5 file.' #-------------------------------# # 计算总的类的数量 #-------------------------------# self.num_classes = len(self.class_names) + 1 #-------------------------------# # 载入模型与权值 #-------------------------------# self.model_rpn, self.model_classifier = frcnn.get_predict_model( self.config, self.num_classes) self.model_rpn.load_weights(self.model_path, by_name=True) self.model_classifier.load_weights(self.model_path, by_name=True) print('{} model, anchors, and classes loaded.'.format(model_path)) # 画框设置不同的颜色 hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) #---------------------------------------------------# # 用于计算共享特征层的大小 #---------------------------------------------------# def get_img_output_length(self, width, height): def get_output_length(input_length): # input_length += 6 filter_sizes = [7, 3, 1, 1] padding = [3, 1, 0, 0] stride = 2 for i in range(4): # input_length = (input_length - filter_size + stride) // stride input_length = (input_length + 2 * padding[i] - filter_sizes[i]) // stride + 1 return input_length return get_output_length(width), get_output_length(height) @tf.function(experimental_relax_shapes=True) def model_rpn_get_pred(self, photo): preds = self.model_rpn(photo, training=False) return preds @tf.function(experimental_relax_shapes=True) def model_classifier_get_pred(self, photo): preds = self.model_classifier(photo, training=False) return preds #---------------------------------------------------# # 检测图片 #---------------------------------------------------# def detect_image(self, image): #-------------------------------------# # 转换成RGB图片,可以用于灰度图预测。 #-------------------------------------# image = image.convert("RGB") image_shape = np.array(np.shape(image)[0:2]) old_width, old_height = image_shape[1], image_shape[0] old_image = copy.deepcopy(image) #---------------------------------------------------------# # 给原图像进行resize,resize到短边为600的大小上 #---------------------------------------------------------# width, height = get_new_img_size(old_width, old_height) image = image.resize([width, height], Image.BICUBIC) photo = np.array(image, dtype=np.float64) #-----------------------------------------------------------# # 图片预处理,归一化。 #-----------------------------------------------------------# photo = preprocess_input(np.expand_dims(photo, 0)) rpn_pred = self.model_rpn_get_pred(photo) rpn_pred = [x.numpy() for x in rpn_pred] #-----------------------------------------------------------# # 将建议框网络的预测结果进行解码 #-----------------------------------------------------------# base_feature_width, base_feature_height = self.get_img_output_length( width, height) anchors = get_anchors([base_feature_width, base_feature_height], width, height) rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors) #-------------------------------------------------------------# # 在获得建议框和共享特征层后,将二者传入classifier中进行预测 #-------------------------------------------------------------# base_layer = rpn_pred[2] proposal_box = np.array(rpn_results)[:, :, 1:] temp_ROIs = np.zeros_like(proposal_box) temp_ROIs[:, :, [0, 1, 2, 3]] = proposal_box[:, :, [1, 0, 3, 2]] classifier_pred = self.model_classifier_get_pred( [base_layer, temp_ROIs]) classifier_pred = [x.numpy() for x in classifier_pred] #-------------------------------------------------------------# # 利用classifier的预测结果对建议框进行解码,获得预测框 #-------------------------------------------------------------# results = self.bbox_util.detection_out_classifier( classifier_pred, proposal_box, self.config, self.confidence) if len(results[0]) == 0: return old_image results = np.array(results[0]) boxes = results[:, :4] top_conf = results[:, 4] top_label_indices = results[:, 5] boxes[:, [0, 2]] = boxes[:, [0, 2]] * old_width boxes[:, [1, 3]] = boxes[:, [1, 3]] * old_height font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = max( (np.shape(old_image)[0] + np.shape(old_image)[1]) // old_width * 2, 1) image = old_image for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c)] score = top_conf[i] left, top, right, bottom = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[int(c)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[int(c)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image