Python BBoxUtility.detection_out примеры использования

Язык программирования: Python

Пространство имен/Пакет: utils.utils

Класс/Тип: BBoxUtility

Метод/Функция: detection_out

Примеров на hotexamples.com: 15

Python BBoxUtility.detection_out - 15 примеров найдено. Это лучшие примеры Python кода для utils.utils.BBoxUtility.detection_out, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

BBoxUtility(18)

detection_out(15)

nms_for_out(3)

detection_out_classifier(2)

detection_out_rpn(2)

Пример #1

Показать файл

    def image_test(self, path, inputs=None, oimg=None):
        bbox_util = BBoxUtility(2)
        if path != None:

            img = cv2.imread(path)
            images = img.copy()

            img = cv2.resize(img, (self.input_shape[0], self.input_shape[1]))

            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            inputs = image.img_to_array(img)

            inputs = preprocess_input(np.array([inputs]))
        else:
            images = oimg.copy()
        preds = self.model.predict(inputs, batch_size=1, verbose=1)
        results = bbox_util.detection_out(preds)
        print(results)
        if len(results) > 0:
            final = []
            for each in results[0]:

                if each[1] < 0.4: continue
                xmin = int(each[2] * np.shape(images)[1])
                ymin = int(each[3] * np.shape(images)[0])
                xmax = int(each[4] * np.shape(images)[1])
                ymax = int(each[5] * np.shape(images)[0])
                final.append([xmin, ymin, xmax, ymax, each[1]])
            return final

        return None

Пример #2

Показать файл

Файл: frcnn.py Проект: yanjingke/faster-rcnn-keras

class FRCNN(object):
    _defaults = {
        "model_path": 'model_data/voc_weights.h5',
        "classes_path": 'model_data/voc_classes.txt',
        "confidence": 0.5,
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #---------------------------------------------------#
    #   初始化faster RCNN
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        self.class_names = self._get_class()
        self.sess = K.get_session()
        self.config = Config()
        self.generate()
        self.bbox_util = BBoxUtility()

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def generate(self):
        model_path = os.path.expanduser(self.model_path)
        assert model_path.endswith(
            '.h5'), 'Keras model or weights must be a .h5 file.'

        # 计算总的种类
        self.num_classes = len(self.class_names) + 1

        # 载入模型，如果原来的模型里已经包括了模型结构则直接载入。
        # 否则先构建模型再载入
        self.model_rpn, self.model_classifier = frcnn.get_predict_model(
            self.config, self.num_classes)
        self.model_rpn.load_weights(self.model_path, by_name=True)
        self.model_classifier.load_weights(self.model_path,
                                           by_name=True,
                                           skip_mismatch=True)

        print('{} model, anchors, and classes loaded.'.format(model_path))

        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

    def get_img_output_length(self, width, height):
        def get_output_length(input_length):
            # input_length += 6
            filter_sizes = [7, 3, 1, 1]
            padding = [3, 1, 0, 0]
            stride = 2
            for i in range(4):
                # input_length = (input_length - filter_size + stride) // stride
                input_length = (input_length + 2 * padding[i] -
                                filter_sizes[i]) // stride + 1
            return input_length

        return get_output_length(width), get_output_length(height)

    #---------------------------------------------------#
    #   检测图片
    #---------------------------------------------------#
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])
        old_width = image_shape[1]
        old_height = image_shape[0]
        old_image = copy.deepcopy(image)
        width, height = get_new_img_size(old_width, old_height)

        image = image.resize([width, height])
        photo = np.array(image, dtype=np.float64)

        # 图片预处理，归一化
        photo = preprocess_input(np.expand_dims(photo, 0))
        preds = self.model_rpn.predict(photo)
        # 将预测结果进行解码
        anchors = get_anchors(self.get_img_output_length(width, height), width,
                              height)

        rpn_results = self.bbox_util.detection_out(preds,
                                                   anchors,
                                                   1,
                                                   confidence_threshold=0)
        R = rpn_results[0][:, 2:]

        R[:, 0] = np.array(np.round(R[:, 0] * width / self.config.rpn_stride),
                           dtype=np.int32)
        R[:, 1] = np.array(np.round(R[:, 1] * height / self.config.rpn_stride),
                           dtype=np.int32)
        R[:, 2] = np.array(np.round(R[:, 2] * width / self.config.rpn_stride),
                           dtype=np.int32)
        R[:, 3] = np.array(np.round(R[:, 3] * height / self.config.rpn_stride),
                           dtype=np.int32)

        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]
        base_layer = preds[2]

        delete_line = []
        for i, r in enumerate(R):
            if r[2] < 1 or r[3] < 1:
                delete_line.append(i)
        R = np.delete(R, delete_line, axis=0)

        bboxes = []
        probs = []
        labels = []
        for jk in range(R.shape[0] // self.config.num_rois + 1):
            ROIs = np.expand_dims(R[self.config.num_rois *
                                    jk:self.config.num_rois * (jk + 1), :],
                                  axis=0)

            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // self.config.num_rois:
                #pad R
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], self.config.num_rois,
                                curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            [P_cls, P_regr] = self.model_classifier.predict([base_layer, ROIs])

            for ii in range(P_cls.shape[1]):
                if np.max(P_cls[0, ii, :-1]) < self.confidence:
                    continue

                label = np.argmax(P_cls[0, ii, :-1])

                (x, y, w, h) = ROIs[0, ii, :]

                cls_num = np.argmax(P_cls[0, ii, :-1])

                (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                tx /= self.config.classifier_regr_std[0]
                ty /= self.config.classifier_regr_std[1]
                tw /= self.config.classifier_regr_std[2]
                th /= self.config.classifier_regr_std[3]

                cx = x + w / 2.
                cy = y + h / 2.
                cx1 = tx * w + cx
                cy1 = ty * h + cy
                w1 = math.exp(tw) * w
                h1 = math.exp(th) * h

                x1 = cx1 - w1 / 2.
                y1 = cy1 - h1 / 2.

                x2 = cx1 + w1 / 2
                y2 = cy1 + h1 / 2

                x1 = int(round(x1))
                y1 = int(round(y1))
                x2 = int(round(x2))
                y2 = int(round(y2))

                bboxes.append([x1, y1, x2, y2])
                probs.append(np.max(P_cls[0, ii, :-1]))
                labels.append(label)

        if len(bboxes) == 0:
            return old_image

        # 筛选出其中得分高于confidence的框
        labels = np.array(labels)
        probs = np.array(probs)
        boxes = np.array(bboxes, dtype=np.float32)
        boxes[:, 0] = boxes[:, 0] * self.config.rpn_stride / width
        boxes[:, 1] = boxes[:, 1] * self.config.rpn_stride / height
        boxes[:, 2] = boxes[:, 2] * self.config.rpn_stride / width
        boxes[:, 3] = boxes[:, 3] * self.config.rpn_stride / height
        results = np.array(
            self.bbox_util.nms_for_out(np.array(labels), np.array(probs),
                                       np.array(boxes), self.num_classes - 1,
                                       0.4))

        top_label_indices = results[:, 0]
        top_conf = results[:, 1]
        boxes = results[:, 2:]
        boxes[:, 0] = boxes[:, 0] * old_width
        boxes[:, 1] = boxes[:, 1] * old_height
        boxes[:, 2] = boxes[:, 2] * old_width
        boxes[:, 3] = boxes[:, 3] * old_height

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = (np.shape(old_image)[0] +
                     np.shape(old_image)[1]) // old_width * 2
        image = old_image
        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c)]
            score = top_conf[i]

            left, top, right, bottom = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[int(c)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[int(c)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        return image

    def close_session(self):
        self.sess.close()

Пример #3

Показать файл

class SSD(object):
    _defaults = {
        "model_path":
        'logs/ep106-loss0.207-val_loss0.855.h5',  # 此路径可以修改为自己的模型目录
        "classes_path": 'model_data/voc_classes.txt',
        "model_image_size": (300, 300, 3),
        "confidence": 0.4,
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #---------------------------------------------------#
    #   初始化yolo
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        self.class_names = self._get_class()
        self.sess = get_session()
        self.generate()
        self.bbox_util = BBoxUtility(self.num_classes)

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def generate(self):
        model_path = os.path.expanduser(self.model_path)
        assert model_path.endswith(
            '.h5'), 'Keras model or weights must be a .h5 file.'

        # 计算总的种类
        self.num_classes = len(self.class_names) + 1

        # 载入模型，如果原来的模型里已经包括了模型结构则直接载入。
        # 否则先构建模型再载入
        try:
            self.ssd_model = tf.keras.models.load_model(model_path,
                                                        compile=False)
        except:
            self.ssd_model = ssd.SSD300(self.model_image_size,
                                        self.num_classes)
            self.ssd_model.load_weights(self.model_path, by_name=True)
        else:
            num_anchors = 8753
            assert self.ssd_model.layers[-1].output_shape[-1] == \
                num_anchors/len(self.ssd_model.output) * (self.num_classes + 5), \
                'Mismatch between model and given anchor and class sizes'

        #self.ssd_model.summary()
        print('{} model, anchors, and classes loaded.'.format(model_path))

        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

    #---------------------------------------------------#
    #   检测图片
    #---------------------------------------------------#
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])
        crop_img, x_offset, y_offset = letterbox_image(image, (300, 300))
        photo = np.array(crop_img, dtype=np.float64)
        self.predict_all = []
        # 图片预处理，归一化
        photo = tf.keras.applications.imagenet_utils.preprocess_input(
            np.reshape(photo, [1, 300, 300, 3]))
        #self.ssd_model.summary()
        preds = self.ssd_model.predict(photo)

        # 将预测结果进行解码
        results = self.bbox_util.detection_out(preds)
        if len(results[0]) <= 0:
            return image

        # 筛选出其中得分高于confidence的框
        det_label = results[0][:, 0]
        det_conf = results[0][:, 1]
        det_xmin, det_ymin, det_xmax, det_ymax = results[0][:, 2], results[
            0][:, 3], results[0][:, 4], results[0][:, 5]
        top_indices = [
            i for i, conf in enumerate(det_conf) if conf >= self.confidence
        ]
        top_conf = det_conf[top_indices]
        top_label_indices = det_label[top_indices].tolist()
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            det_xmin[top_indices],
            -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims(
                det_xmax[top_indices],
                -1), np.expand_dims(det_ymax[top_indices], -1)

        boxes = ssd_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax,
                                  np.array([300, 300]), image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = (np.shape(image)[0] + np.shape(image)[1]) // 300

        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c) - 1]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))
            self.result_ = '{} {} {} {} {} {}'.format(
                "".join(predicted_class.split(" ")), score, left, top, right,
                bottom)
            self.predict_all.append(self.result_)
            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=255)
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=255)
            draw.text(text_origin, str(label, 'UTF-8'), fill=0, font=font)
            del draw
        return image

Пример #4

Показать файл

Файл: efficientdet.py Проект: bubbliiiing/efficientdet-keras

class EfficientDet(object):
    _defaults = {
        "model_path": 'model_data/efficientdet-d0-voc.h5',
        "classes_path": 'model_data/voc_classes.txt',
        "phi": 0,
        "confidence": 0.4,
        "iou": 0.3,
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #---------------------------------------------------#
    #   初始化efficientdet
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        self.class_names = self._get_class()
        self.model_image_size = [
            image_sizes[self.phi], image_sizes[self.phi], 3
        ]
        self.sess = K.get_session()
        self.generate()
        self.bbox_util = BBoxUtility(self.num_classes, nms_thresh=self.iou)
        self.prior = self._get_prior()

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names

    #---------------------------------------------------#
    #   获得先验框
    #---------------------------------------------------#
    def _get_prior(self):
        data = get_anchors(image_sizes[self.phi])
        return data

    #---------------------------------------------------#
    #   载入模型
    #---------------------------------------------------#
    def generate(self):
        model_path = os.path.expanduser(self.model_path)
        assert model_path.endswith(
            '.h5'), 'Keras model or weights must be a .h5 file.'
        #----------------------------------------#
        #   计算种类数量
        #----------------------------------------#
        self.num_classes = len(self.class_names)

        #----------------------------------------#
        #   创建Efficientdet模型
        #----------------------------------------#
        self.Efficientdet = Efficientdet(self.phi, self.num_classes)
        self.Efficientdet.load_weights(self.model_path)

        print('{} model, anchors, and classes loaded.'.format(model_path))

        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

    #---------------------------------------------------#
    #   检测图片
    #---------------------------------------------------#
    def detect_image(self, image):
        #---------------------------------------------------------#
        #   在这里将图像转换成RGB图像，防止灰度图在预测时报错。
        #---------------------------------------------------------#
        image = image.convert('RGB')

        image_shape = np.array(np.shape(image)[0:2])
        #---------------------------------------------------------#
        #   给图像增加灰条，实现不失真的resize
        #---------------------------------------------------------#
        crop_img = letterbox_image(
            image, [self.model_image_size[1], self.model_image_size[0]])

        #-----------------------------------------------------------#
        #   图片预处理，归一化。获得的photo的shape为[1, 512, 512, 3]
        #-----------------------------------------------------------#
        photo = np.array(crop_img, dtype=np.float32)
        photo = np.reshape(preprocess_input(photo), [
            1, self.model_image_size[0], self.model_image_size[1],
            self.model_image_size[2]
        ])

        preds = self.Efficientdet.predict(photo)
        #-----------------------------------------------------------#
        #   将预测结果进行解码
        #-----------------------------------------------------------#
        results = self.bbox_util.detection_out(
            preds, self.prior, confidence_threshold=self.confidence)

        #--------------------------------------#
        #   如果没有检测到物体，则返回原图
        #--------------------------------------#
        if len(results[0]) <= 0:
            return image
        results = np.array(results)

        det_label = results[0][:, 5]
        det_conf = results[0][:, 4]
        det_xmin, det_ymin, det_xmax, det_ymax = results[0][:, 0], results[
            0][:, 1], results[0][:, 2], results[0][:, 3]
        #-----------------------------------------------------------#
        #   筛选出其中得分高于confidence的框
        #-----------------------------------------------------------#
        top_indices = [
            i for i, conf in enumerate(det_conf) if conf >= self.confidence
        ]
        top_conf = det_conf[top_indices]
        top_label_indices = det_label[top_indices].tolist()
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            det_xmin[top_indices],
            -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims(
                det_xmax[top_indices],
                -1), np.expand_dims(det_ymax[top_indices], -1)

        #-----------------------------------------------------------#
        #   去掉灰条部分
        #-----------------------------------------------------------#
        boxes = efficientdet_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.model_image_size[0], self.model_image_size[1]]),
            image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = max((np.shape(image)[0] + np.shape(image)[1]) //
                        self.model_image_size[0], 1)

        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c)]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label, top, left, bottom, right)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[int(c)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[int(c)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        return image

    def get_FPS(self, image, test_interval):
        image_shape = np.array(np.shape(image)[0:2])
        #---------------------------------------------------------#
        #   给图像增加灰条，实现不失真的resize
        #---------------------------------------------------------#
        crop_img = letterbox_image(
            image, [self.model_image_size[1], self.model_image_size[0]])
        #-----------------------------------------------------------#
        #   图片预处理，归一化。获得的photo的shape为[1, 512, 512, 3]
        #-----------------------------------------------------------#
        photo = np.array(crop_img, dtype=np.float32)
        photo = np.reshape(preprocess_input(photo), [
            1, self.model_image_size[0], self.model_image_size[1],
            self.model_image_size[2]
        ])

        preds = self.Efficientdet.predict(photo)
        #-----------------------------------------------------------#
        #   将预测结果进行解码
        #-----------------------------------------------------------#
        results = self.bbox_util.detection_out(
            preds, self.prior, confidence_threshold=self.confidence)

        if len(results[0]) > 0:
            results = np.array(results)

            det_label = results[0][:, 5]
            det_conf = results[0][:, 4]
            det_xmin, det_ymin, det_xmax, det_ymax = results[0][:, 0], results[
                0][:, 1], results[0][:, 2], results[0][:, 3]
            #-----------------------------------------------------------#
            #   筛选出其中得分高于confidence的框
            #-----------------------------------------------------------#
            top_indices = [
                i for i, conf in enumerate(det_conf) if conf >= self.confidence
            ]
            top_conf = det_conf[top_indices]
            top_label_indices = det_label[top_indices].tolist()
            top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                det_xmin[top_indices],
                -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims(
                    det_xmax[top_indices],
                    -1), np.expand_dims(det_ymax[top_indices], -1)

            #-----------------------------------------------------------#
            #   去掉灰条部分
            #-----------------------------------------------------------#
            boxes = efficientdet_correct_boxes(
                top_ymin, top_xmin, top_ymax, top_xmax,
                np.array([self.model_image_size[0], self.model_image_size[1]]),
                image_shape)

        t1 = time.time()
        for _ in range(test_interval):
            preds = self.Efficientdet.predict(photo)
            #-----------------------------------------------------------#
            #   将预测结果进行解码
            #-----------------------------------------------------------#
            results = self.bbox_util.detection_out(
                preds, self.prior, confidence_threshold=self.confidence)
            if len(results[0]) > 0:
                results = np.array(results)

                det_label = results[0][:, 5]
                det_conf = results[0][:, 4]
                det_xmin, det_ymin, det_xmax, det_ymax = results[
                    0][:, 0], results[0][:, 1], results[0][:, 2], results[0][:,
                                                                             3]
                #-----------------------------------------------------------#
                #   筛选出其中得分高于confidence的框
                #-----------------------------------------------------------#
                top_indices = [
                    i for i, conf in enumerate(det_conf)
                    if conf >= self.confidence
                ]
                top_conf = det_conf[top_indices]
                top_label_indices = det_label[top_indices].tolist()
                top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                    det_xmin[top_indices], -1), np.expand_dims(
                        det_ymin[top_indices], -1), np.expand_dims(
                            det_xmax[top_indices],
                            -1), np.expand_dims(det_ymax[top_indices], -1)

                #-----------------------------------------------------------#
                #   去掉灰条部分
                #-----------------------------------------------------------#
                boxes = efficientdet_correct_boxes(
                    top_ymin, top_xmin, top_ymax, top_xmax,
                    np.array(
                        [self.model_image_size[0], self.model_image_size[1]]),
                    image_shape)

        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time

    def close_session(self):
        self.sess.close()

Пример #5

Показать файл

Файл: train.py Проект: zkailinzhang/faster-rcnn-keras

                        'RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.'
                    )

            X, Y, boxes = batch[0], batch[1], batch[2]

            loss_rpn = model_rpn.train_on_batch(X, Y)
            write_log(callback, ['rpn_cls_loss', 'rpn_reg_loss'], loss_rpn,
                      train_step)
            P_rpn = model_rpn.predict_on_batch(X)
            height, width, _ = np.shape(X[0])
            anchors = get_anchors(get_img_output_length(width, height), width,
                                  height)

            # 将预测结果进行解码
            results = bbox_util.detection_out(P_rpn,
                                              anchors,
                                              1,
                                              confidence_threshold=0)

            R = results[0][:, 2:]

            X2, Y1, Y2, IouS = calc_iou(R, config, boxes[0], width, height,
                                        NUM_CLASSES)

            if X2 is None:
                rpn_accuracy_rpn_monitor.append(0)
                rpn_accuracy_for_epoch.append(0)
                continue

            neg_samples = np.where(Y1[0, :, -1] == 1)
            pos_samples = np.where(Y1[0, :, -1] == 0)

Пример #6

Показать файл

class SSD(object):
    _defaults = {
        "model_path": 'model_data/ssd_weights.h5',
        "classes_path": 'model_data/voc_classes.txt',
        "model_image_size": (300, 300, 3),
        "confidence": 0.5,
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #---------------------------------------------------#
    #   初始化ssd
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        self.class_names = self._get_class()
        self.sess = K.get_session()
        self.generate()
        self.bbox_util = BBoxUtility(self.num_classes)

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def generate(self):
        model_path = os.path.expanduser(self.model_path)
        assert model_path.endswith(
            '.h5'), 'Keras model or weights must be a .h5 file.'

        # 计算总的种类
        self.num_classes = len(self.class_names) + 1

        # 载入模型
        self.ssd_model = ssd.SSD300(self.model_image_size, self.num_classes)
        self.ssd_model.load_weights(self.model_path, by_name=True)

        self.ssd_model.summary()
        print('{} model, anchors, and classes loaded.'.format(model_path))

        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

    #---------------------------------------------------#
    #   检测图片
    #---------------------------------------------------#
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])
        crop_img, x_offset, y_offset = letterbox_image(
            image, (self.model_image_size[0], self.model_image_size[1]))
        photo = np.array(crop_img, dtype=np.float64)

        # 图片预处理，归一化
        photo = preprocess_input(
            np.reshape(
                photo,
                [1, self.model_image_size[0], self.model_image_size[1], 3]))
        preds = self.ssd_model.predict(photo)

        # 将预测结果进行解码
        results = self.bbox_util.detection_out(
            preds, confidence_threshold=self.confidence)

        if len(results[0]) <= 0:
            return image

        # 筛选出其中得分高于confidence的框
        det_label = results[0][:, 0]
        det_conf = results[0][:, 1]
        det_xmin, det_ymin, det_xmax, det_ymax = results[0][:, 2], results[
            0][:, 3], results[0][:, 4], results[0][:, 5]
        top_indices = [
            i for i, conf in enumerate(det_conf) if conf >= self.confidence
        ]
        top_conf = det_conf[top_indices]
        top_label_indices = det_label[top_indices].tolist()
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            det_xmin[top_indices],
            -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims(
                det_xmax[top_indices],
                -1), np.expand_dims(det_ymax[top_indices], -1)

        # 去掉灰条
        boxes = ssd_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.model_image_size[0], self.model_image_size[1]]),
            image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = (np.shape(image)[0] +
                     np.shape(image)[1]) // self.model_image_size[0]

        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c) - 1]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[int(c) - 1])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[int(c) - 1])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        return image

    def close_session(self):
        self.sess.close()

Пример #7

Показать файл

Файл: retinaface.py Проект: ssunguotu/LeNet_face

class Retinaface(object):
    _defaults = {
        "model_path": 'model_data/retinaface_mobilenet025.h5',
        "backbone": "mobilenet",
        "confidence": 0.5,
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #---------------------------------------------------#
    #   初始化Retinaface
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        if self.backbone == "mobilenet":
            self.cfg = cfg_mnet
        else:
            self.cfg = cfg_re50
        self.bbox_util = BBoxUtility()
        self.generate()

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def generate(self):
        model_path = os.path.expanduser(self.model_path)
        assert model_path.endswith(
            '.h5'), 'tensorflow.keras model or weights must be a .h5 file.'

        # 加快模型训练的效率
        print('Loading weights into state dict...')
        # 载入模型
        self.retinaface = RetinaFace(self.cfg, self.backbone)
        self.retinaface.load_weights(self.model_path)
        print('{} model, anchors loaded.'.format(self.model_path))

    @tf.function
    def get_pred(self, photo):
        preds = self.retinaface(photo, training=False)
        return preds

    #---------------------------------------------------#
    #   检测图片
    #---------------------------------------------------#
    def detect_image(self, image):
        old_image = image.copy()

        image = np.array(image, np.float32)
        im_height, im_width, _ = np.shape(image)

        scale = [im_width, im_height, im_width, im_height]
        scale_for_landmarks = [
            im_width, im_height, im_width, im_height, im_width, im_height,
            im_width, im_height, im_width, im_height
        ]

        # 图片预处理，归一化
        photo = np.expand_dims(preprocess_input(image), 0)
        anchors = Anchors(self.cfg,
                          image_size=(im_height, im_width)).get_anchors()

        preds = self.get_pred(photo)
        preds = [pred.numpy() for pred in preds]

        # 将预测结果进行解码和非极大抑制
        results = self.bbox_util.detection_out(
            preds, anchors, confidence_threshold=self.confidence)

        if len(results) <= 0:
            return old_image, 0, 0
        results = np.array(results)
        results[:, :4] = results[:, :4] * scale
        results[:, 5:] = results[:, 5:] * scale_for_landmarks

        for b in results:
            text = "{:.4f}".format(b[4])
            b = list(map(int, b))
            cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255),
                          2)

            #####################
            global cnt, t0, t1
            t1 = time.time()
            image_clip = old_image
            # if t1 - t0 > 1:
            #     t0 = t1
            # image_clip = old_image[b[1]-20:b[3]+20, b[0]-20:b[2]+20]
            image_clip = old_image[b[1]:b[3], b[0]:b[2]]
            image_clip = cv2.cvtColor(image_clip, cv2.COLOR_RGB2BGR)

            # 保存剪切的图片
            # cv2.imshow("clip", image_clip)
            # cv2.imwrite("savedImg/wang/" + str(t1) + ".png", image_clip)
            # cnt += 1
            # print(cnt)
            #####################

            cx = b[0]
            cy = b[1] + 12
            cv2.putText(old_image, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX,
                        0.5, (255, 255, 255))

            # landms
            cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4)
            cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4)
            cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4)
            cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4)
            cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4)

        return old_image, image_clip, len(results)

Пример #8

Показать файл

Файл: ssd.py Проект: firststeping/Mobilenet-SSD-Essay

class SSD(object):
    _defaults = {
        "model_path": 'model_data/essay_mobilenet_ssd_weights.h5',
        "classes_path": 'model_data/voc_classes.txt',
        "input_shape": (300, 300, 3),
        "confidence": 0.4,
        "nms_iou": 0.45,
        'anchors_size': [30, 60, 111, 162, 213, 264, 315],
        #---------------------------------------------------------------------#
        #   该变量用于控制是否使用letterbox_image对输入图像进行不失真的resize，
        #   在多次测试后，发现关闭letterbox_image直接resize的效果更好
        #---------------------------------------------------------------------#
        "letterbox_image": False,
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #---------------------------------------------------#
    #   初始化ssd
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        self.class_names = self._get_class()
        self.sess = K.get_session()
        self.generate()
        self.bbox_util = BBoxUtility(self.num_classes, nms_thresh=self.nms_iou)

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names

    #---------------------------------------------------#
    #   载入模型
    #---------------------------------------------------#
    def generate(self):
        model_path = os.path.expanduser(self.model_path)
        assert model_path.endswith(
            '.h5'), 'Keras model or weights must be a .h5 file.'

        #-------------------------------#
        #   计算总的类的数量
        #-------------------------------#
        self.num_classes = len(self.class_names) + 1

        #-------------------------------#
        #   载入模型与权值
        #-------------------------------#
        self.ssd_model = ssd.SSD300(self.input_shape,
                                    self.num_classes,
                                    anchors_size=self.anchors_size)
        self.ssd_model.load_weights(self.model_path, by_name=True)

        print('{} model, anchors, and classes loaded.'.format(model_path))

        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

    #---------------------------------------------------#
    #   检测图片
    #---------------------------------------------------#
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])
        #---------------------------------------------------------#
        #   给图像增加灰条，实现不失真的resize
        #   也可以直接resize进行识别
        #---------------------------------------------------------#
        if self.letterbox_image:
            crop_img = np.array(
                letterbox_image(image,
                                (self.input_shape[1], self.input_shape[0])))
        else:
            crop_img = image.convert('RGB')
            crop_img = crop_img.resize(
                (self.input_shape[1], self.input_shape[0]), Image.BICUBIC)
        photo = np.array(crop_img, dtype=np.float64)
        #-----------------------------------------------------------#
        #   图片预处理，归一化。
        #-----------------------------------------------------------#
        photo = preprocess_input(
            np.reshape(photo,
                       [1, self.input_shape[0], self.input_shape[1], 3]))
        preds = self.ssd_model.predict(photo)

        #-----------------------------------------------------------#
        #   将预测结果进行解码
        #-----------------------------------------------------------#
        results = self.bbox_util.detection_out(
            preds, confidence_threshold=self.confidence)

        #--------------------------------------#
        #   如果没有检测到物体，则返回原图
        #--------------------------------------#
        if len(results[0]) <= 0:
            return image

        #-----------------------------------------------------------#
        #   筛选出其中得分高于confidence的框
        #-----------------------------------------------------------#
        det_label = results[0][:, 0]
        det_conf = results[0][:, 1]
        det_xmin, det_ymin, det_xmax, det_ymax = results[0][:, 2], results[
            0][:, 3], results[0][:, 4], results[0][:, 5]
        top_indices = [
            i for i, conf in enumerate(det_conf) if conf >= self.confidence
        ]
        top_conf = det_conf[top_indices]
        top_label_indices = det_label[top_indices].tolist()
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            det_xmin[top_indices],
            -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims(
                det_xmax[top_indices],
                -1), np.expand_dims(det_ymax[top_indices], -1)

        #-----------------------------------------------------------#
        #   去掉灰条部分
        #-----------------------------------------------------------#
        if self.letterbox_image:
            boxes = ssd_correct_boxes(
                top_ymin, top_xmin, top_ymax, top_xmax,
                np.array([self.input_shape[0], self.input_shape[1]]),
                image_shape)
        else:
            top_xmin = top_xmin * image_shape[1]
            top_ymin = top_ymin * image_shape[0]
            top_xmax = top_xmax * image_shape[1]
            top_ymax = top_ymax * image_shape[0]
            boxes = np.concatenate([top_ymin, top_xmin, top_ymax, top_xmax],
                                   axis=-1)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = max(
            (np.shape(image)[0] + np.shape(image)[1]) // self.input_shape[0],
            1)

        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c) - 1]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label, top, left, bottom, right)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[int(c) - 1])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[int(c) - 1])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        return image

    def close_session(self):
        self.sess.close()

Пример #9

Показать файл

Файл: retinaface.py Проект: yyqgood/retinaface-keras

class Retinaface(object):
    #-------------------------------#
    #   请注意主干网络
    #   与预训练权重的对应
    #   即注意修改model_path
    #   和backbone
    #-------------------------------#
    _defaults = {
        "model_path": 'model_data/retinaface_mobilenet025.h5',
        "backbone": "mobilenet",
        "confidence": 0.5,
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #---------------------------------------------------#
    #   初始化Retinaface
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        if self.backbone == "mobilenet":
            self.cfg = cfg_mnet
        else:
            self.cfg = cfg_re50
        self.bbox_util = BBoxUtility()
        self.generate()

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def generate(self):
        model_path = os.path.expanduser(self.model_path)
        assert model_path.endswith(
            '.h5'), 'Keras model or weights must be a .h5 file.'

        # 载入模型
        self.retinaface = RetinaFace(self.cfg, self.backbone)
        self.retinaface.load_weights(self.model_path, by_name=True)

    #---------------------------------------------------#
    #   检测图片
    #---------------------------------------------------#
    def detect_image(self, image):
        old_image = image.copy()

        image = np.array(image, np.float32)
        im_height, im_width, _ = np.shape(image)

        scale = [
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0]
        ]
        scale_for_landmarks = [
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0]
        ]

        # 图片预处理，归一化
        photo = np.expand_dims(preprocess_input(image), 0)
        anchors = Anchors(self.cfg,
                          image_size=(im_height, im_width)).get_anchors()

        preds = self.retinaface.predict(photo)
        # 将预测结果进行解码和非极大抑制
        results = self.bbox_util.detection_out(
            preds, anchors, confidence_threshold=self.confidence)

        if len(results) <= 0:
            return old_image
        results = np.array(results)
        results[:, :4] = results[:, :4] * scale
        results[:, 5:] = results[:, 5:] * scale_for_landmarks

        for b in results:
            text = "{:.4f}".format(b[4])
            b = list(map(int, b))
            cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255),
                          2)
            cx = b[0]
            cy = b[1] + 12
            cv2.putText(old_image, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX,
                        0.5, (255, 255, 255))

            # landms
            cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4)
            cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4)
            cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4)
            cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4)
            cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4)
        return old_image

Пример #10

Показать файл

class ssdT(object):
    def __init__(self, model, classes, input_shape):
        self.classes = classes
        self.num_class = len(classes) + 1
        self.model = model
        self.input_shape = input_shape
        self.bbox_util = BBoxUtility(self.num_class)

    def image_test(self, path, inputs=None, oimg=None):
        bbox_util = BBoxUtility(2)
        if path != None:

            img = cv2.imread(path)
            images = img.copy()

            img = cv2.resize(img, (self.input_shape[0], self.input_shape[1]))

            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            inputs = image.img_to_array(img)

            inputs = preprocess_input(np.array([inputs]))
        else:
            images = oimg.copy()
        preds = self.model.predict(inputs, batch_size=1, verbose=1)
        results = bbox_util.detection_out(preds)
        print(results)
        if len(results) > 0:
            final = []
            for each in results[0]:

                if each[1] < 0.4: continue
                xmin = int(each[2] * np.shape(images)[1])
                ymin = int(each[3] * np.shape(images)[0])
                xmax = int(each[4] * np.shape(images)[1])
                ymax = int(each[5] * np.shape(images)[0])
                final.append([xmin, ymin, xmax, ymax, each[1]])
            return final

        return None

    def precision(self, test_path):
        data = testdata_load(test_path)
        gnum = 0
        rnum = 0
        for eachline in data:
            res = self.image_test(eachline[0])
            gtlist = []
            temp = []
            for i in range(len(eachline)):
                if i % 5 == 0: continue
                if i % 5 == 1 and i // 5 > 0:
                    gtlist.append(temp)
                    temp = []
                temp.append(int(eachline[i]))
            gtlist.append(temp)
            print(res)
            tnum, pgnum = self.cal_iou(res, gtlist)
            gnum += pgnum
            rnum += tnum
            print("precision:", float(rnum / gnum))

    def cal_iou(self, res, gt):
        if res == None:
            return 0, len(gt)

        tnum = 0
        for each in gt:
            gxmin = each[0]
            gymin = each[1]
            gxmax = each[2]
            gymax = each[3]
            for one in res:
                overlap = (np.min([gxmax, one[2]]) - np.max(
                    [gxmin, one[0]])) * (np.min([gymax, one[3]]) -
                                         np.max([gymin, one[1]]))
                ares = (one[2] - one[0]) * (one[3] - one[1])
                agt = (gxmax - gxmin) * (gymax - gymin)
                wholea = agt + ares - overlap
                ratio = overlap / wholea
                if ratio > 0.7: tnum += 1

        return tnum, len(gt)

    def run(self,
            model_path,
            video_path=None,
            openposeJson=None,
            out_path=None,
            start_frame=0,
            conf_threshold=0.5,
            model2=None,
            model3=None):

        openpose_part = [
            "Nose", "Neck", "RShoulder", "RElbow", "RWrist", "LShoulder",
            "LElbow", "LWrist", "MidHip", "RHip", "RKnee", "RAnkle", "LHip",
            "LKnee", "LAnkle", "REye", "LEye", "REar", "LEar", "LBigToe",
            "LSmallToe", "LHeel", "RBigToe", "RSmallToe", "RHeel", "Background"
        ]

        fingertips = Fingertips(weights='model_data/finmodel.h5')
        if video_path == None: return None
        video = cv2.VideoCapture(video_path)

        timeline = []
        labelline = []
        handStatus = []
        if out_path:
            fourcc = cv2.VideoWriter_fourcc(*'XVID')
            out = cv2.VideoWriter(out_path,
                                  fourcc,
                                  10.0,
                                  (int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),
                                   int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))),
                                  isColor=True)

        vggmodel = load_model(model_path)
        if start_frame > 0:
            video.set(cv2.cv.CV_CAP_PROP_POS_MSEC, start_frame)

        accum_time = 0
        curr_fps = 0
        prev_time = timer()

        feature_params = dict(maxCorners=100,
                              qualityLevel=0.3,
                              minDistance=7,
                              blockSize=7)

        lk_params = dict(winSize=(15, 15),
                         maxLevel=2,
                         criteria=(cv2.TERM_CRITERIA_EPS
                                   | cv2.TERM_CRITERIA_COUNT, 10, 0.03))

        color = np.random.randint(0, 255, (100, 3))
        num_frame = 0
        video_info = {}
        frame_info = []
        lastTime = 0
        while True:
            info, vimage = video.read()
            milliseconds = video.get(cv2.CAP_PROP_POS_MSEC)
            seconds = milliseconds / 1000

            video_info[str(seconds)] = []
            if not info:
                plt.figure(figsize=(100, 20))
                for i in range(len(labelline)):
                    if i == 0 or i == (len(labelline) - 1): continue
                    if labelline[i] != labelline[
                            i - 1] and labelline[i] != labelline[i + 1]:
                        labelline[i] = labelline[i - 1]

                for i in range(len(handStatus)):
                    if i == 0 or i == (len(handStatus) - 1): continue
                    if handStatus[i] != handStatus[
                            i - 1] and handStatus[i] != handStatus[i + 1]:
                        handStatus[i] = handStatus[i - 1]

                #newlabelline = []

                for i in range(len(labelline)):
                    temp = []
                    #if i - 3 >=0: temp.append(handStatus[i-3])
                    if i - 2 >= 0: temp.append(labelline[i - 2])
                    if i - 1 >= 0: temp.append(labelline[i - 1])
                    temp.append(labelline[i])
                    if i + 1 < len(labelline): temp.append(labelline[i + 1])
                    if i + 2 < len(labelline): temp.append(labelline[i + 2])
                    #if i + 3 < len(handStatus): temp.append(handStatus[i+3])
                    labelline[i] = Counter(temp).most_common(1)[0][0]

                for i in range(len(handStatus)):
                    temp = []
                    #if i - 3 >=0: temp.append(handStatus[i-3])
                    if i - 2 >= 0: temp.append(handStatus[i - 2])
                    if i - 1 >= 0: temp.append(handStatus[i - 1])
                    temp.append(handStatus[i])
                    if i + 1 < len(handStatus): temp.append(handStatus[i + 1])
                    if i + 2 < len(handStatus): temp.append(handStatus[i + 2])
                    #if i + 3 < len(handStatus): temp.append(handStatus[i+3])
                    handStatus[i] = Counter(temp).most_common(1)[0][0]

                #np.save("labelline.npy",labelline)
                plt.plot(timeline, labelline, label='hand exist', color='r')
                plt.plot(timeline, handStatus, label="hand status", color='b')
                finaltime = int(float(timeline[-1])) + 2
                plt.hlines("hand exist",
                           0,
                           finaltime,
                           color="green",
                           linestyles="dashed")
                plt.hlines("hand not exist",
                           0,
                           finaltime,
                           color="blue",
                           linestyles="dashed")
                plt.hlines("touch exist",
                           0,
                           finaltime,
                           color="red",
                           linestyles="dashed")
                plt.hlines("no touch exist",
                           0,
                           finaltime,
                           color="green",
                           linestyles="dashed")
                plt.text(finaltime,
                         "hand exist",
                         "hand detected at each time",
                         fontsize=10)
                plt.text(finaltime,
                         "hand not exist",
                         "hand not detected at each time",
                         fontsize=10)
                plt.text(finaltime,
                         "touch exist",
                         "hand detected and touch valid at each time",
                         fontsize=10)
                plt.text(
                    finaltime,
                    "no touch exist",
                    "no hand or no touch valid though hand detected at each time",
                    fontsize=10)
                plt.xlabel("time(ms)/per frame", fontsize=20)
                plt.ylabel(
                    "hand relative label(blue is touch validation label, red is hand detection label)",
                    fontsize=20)
                plt.legend()
                plt.savefig(video_path[:-4] + ".jpg")
                video.release()
                if out_path: out.release()
                cv2.destroyAllWindows()
                with open(video_path[:-4] + ".json", "a") as outfile:
                    json.dump(video_info, outfile, ensure_ascii=False)
                    outfile.write('\n')
                print("Over")
                return
            timeline.append(round(milliseconds, 2))
            input_size = (self.input_shape[0], self.input_shape[1])
            resized = cv2.resize(vimage, input_size)
            rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)

            inputs = image.img_to_array(rgb)
            input_image = preprocess_input(np.array([inputs]))

            res = [[]]
            #if type(res[0]) != list: res[0] = res[0].tolist()
            if openposeJson:
                #res = [[]]
                video_file_name = os.listdir(openposeJson)
                body_info = json.load(
                    open(openposeJson + video_file_name[num_frame],
                         "r"))["people"]
                for h in range(len(body_info)):
                    for x in range(len(body_info[h]["pose_keypoints_2d"])):
                        if int(body_info[h]["pose_keypoints_2d"][4]) != 0:
                            if int(body_info[h]["pose_keypoints_2d"][25]) != 0:
                                distance = int(
                                    (body_info[h]["pose_keypoints_2d"][25] -
                                     body_info[h]["pose_keypoints_2d"][4]) / 2)
                            else:
                                distance = int(
                                    (np.shape(vimage)[0] -
                                     body_info[h]["pose_keypoints_2d"][4]) / 2)
                        else:
                            distance = 100

                        if x / 3 == 4 or x / 3 == 7:
                            tres = []
                            weightsum = 0
                            xpos = int(body_info[h]["pose_keypoints_2d"][x])
                            ypos = int(body_info[h]["pose_keypoints_2d"][x +
                                                                         1])
                            elxpos = int(body_info[h]["pose_keypoints_2d"][x -
                                                                           3])
                            elypos = int(body_info[h]["pose_keypoints_2d"][x -
                                                                           2])
                            if xpos == 0 and ypos == 0: continue

                            if elxpos >= xpos:
                                xmin = (
                                    xpos -
                                    distance) if (xpos - distance) > 0 else 0
                                xmax = (xpos + int(distance / 2)) if (
                                    xpos + int(distance / 2)) < np.shape(
                                        vimage)[1] else np.shape(vimage)[1]
                            else:
                                xmin = (xpos - int(distance / 2)) if (
                                    xpos - int(distance / 2)) > 0 else 0
                                xmax = (
                                    xpos +
                                    distance) if (xpos + distance) < np.shape(
                                        vimage)[1] else np.shape(vimage)[1]

                            if elypos >= ypos:
                                ymin = (
                                    ypos -
                                    distance) if (ypos - distance) > 0 else 0
                                ymax = (ypos + int(distance / 2)) if (
                                    ypos + int(distance / 2)) < np.shape(
                                        vimage)[0] else np.shape(vimage)[0]

                            else:
                                ymin = (ypos - int(distance / 2)) if (
                                    ypos - int(distance / 2)) > 0 else 0
                                ymax = (
                                    ypos +
                                    distance) if (ypos + distance) < np.shape(
                                        vimage)[0] else np.shape(vimage)[0]
                            print("distance is", distance, "box is",
                                  [xmin, ymin, xmax, ymax])
                            #cv2.rectangle(vimage,(xmin,ymin),(xmax,ymax),(255,0,0),1)
                            crop_image = vimage[ymin:ymax, xmin:xmax]
                            rgb_crop = cv2.cvtColor(
                                cv2.resize(crop_image, input_size),
                                cv2.COLOR_BGR2RGB)
                            input_crop = preprocess_input(
                                np.array([image.img_to_array(rgb_crop)]))
                            if model2 == None or model3 == None:
                                if len(res) > 0:
                                    res[0].append(
                                        self.bbox_util.detection_out(
                                            self.model.predict(input_crop))[0]
                                        [0])
                            else:
                                if len(
                                        combine(self, model2, model3, None,
                                                input_crop, crop_image)) > 0:
                                    #indexpro = np.array(combine(self,model2,model3, None, input_crop,crop_image))[:,1]
                                    #maxindex = np.where(indexpro == np.max(indexpro))[0][0]
                                    #each = combine(self,model2,model3, None, input_crop,crop_image)[maxindex]
                                    for each in combine(
                                            self, model2, model3, None,
                                            input_crop, crop_image):
                                        #print(each)
                                        if each[1] < conf_threshold: continue
                                        #weightsum += each[1]
                                        if each[2] <= 1 and each[
                                                3] <= 1 and each[
                                                    4] <= 1 and each[5] <= 1:
                                            each[2] = int(
                                                each[2] *
                                                np.shape(crop_image)[1]) + xmin
                                            each[3] = int(
                                                each[3] *
                                                np.shape(crop_image)[0]) + ymin
                                            each[4] = int(
                                                each[4] *
                                                np.shape(crop_image)[1]) + xmin
                                            each[5] = int(
                                                each[5] *
                                                np.shape(crop_image)[0]) + ymin
                                        else:
                                            each[2] = int(each[2]) + xmin
                                            each[3] = int(each[3]) + ymin
                                            each[4] = int(each[4]) + xmin
                                            each[5] = int(each[5]) + ymin

                                        res[0].append(each)
                                        print("res is", res)

                                        #tres.append(each)
                                    """    
                                    finalbox = [1,1,0,0,0,0]
                                    for each in tres:
                                        finalbox[2] = int(finalbox[2] + each[2] * each[1]/weightsum)
                                        finalbox[3] = int(finalbox[3] + each[3] * each[1]/weightsum)
                                        finalbox[4] = int(finalbox[4] + each[4] * each[1]/weightsum)
                                        finalbox[5] = int(finalbox[5] + each[5] * each[1]/weightsum)
                                    """

                            #print(xpos, ypos)
            if len(res[0]) == 0:
                if model2 == None or model3 == None:

                    pred = self.model.predict(input_image)

                    res = self.bbox_util.detection_out(pred)
                else:
                    #ssd ensemble learning
                    res = [
                        combine(self, model2, model3, None, input_image,
                                vimage)
                    ]

            if len(res) > 0 and len(res[0]) > 0:
                #labelline.append("hand exist")

                #deal with each frame
                temp = {}
                temp["hand"] = "exist"
                temp["hand status"] = []
                temp["body part"] = []
                temp["hand position"] = []
                for each in res[0]:

                    if each[1] < conf_threshold: continue
                    if each[2] <= 1 and each[3] <= 1 and each[4] <= 1 and each[
                            5] <= 1:
                        xmin = int(each[2] * np.shape(vimage)[1])
                        ymin = int(each[3] * np.shape(vimage)[0])
                        xmax = int(each[4] * np.shape(vimage)[1])
                        ymax = int(each[5] * np.shape(vimage)[0])
                    else:
                        xmin = int(each[2])
                        ymin = int(each[3])
                        xmax = int(each[4])
                        ymax = int(each[5])

                    test_img = vimage[ymin:ymax, xmin:xmax]

                    height, width, _ = test_img.shape

                    if height < 5 or width < 5:
                        finum = 0
                        continue

                    else:

                        temp["hand position"].append([xmin, ymin, xmax, ymax])
                        # gesture classification and fingertips regression
                        prob, pos = fingertips.classify(image=test_img)
                        pos = np.mean(pos, 0)

                        # post-processing
                        prob = np.asarray([(p >= 0.5) * 1.0 for p in prob])
                        for i in range(0, len(pos), 2):
                            pos[i] = pos[i] * width + xmin
                            pos[i + 1] = pos[i + 1] * height + ymin

                        # drawing
                        index = 0
                        color = [(15, 15, 240), (15, 240, 155), (240, 155, 15),
                                 (240, 15, 155), (240, 15, 240)]
                        #image = cv2.rectangle(image, (tl[0], tl[1]), (br[0], br[1]), (235, 26, 158), 2)
                        finum = 0
                        for c, p in enumerate(prob):
                            if p > 0.5:
                                finum += 1
                                vimage = cv2.circle(
                                    vimage,
                                    (int(pos[index]), int(pos[index + 1])),
                                    radius=12,
                                    color=color[c],
                                    thickness=-2)
                            index = index + 2

                    #edge post process
                    """
                    edges = edge(None,test_img)
                    edges = cv2.cvtColor(edges, cv2.COLOR_GRAY2RGB)
                    test_img = cv2.subtract(test_img, edges)
                    
                    
                    test_imgr90 = cv2.flip(cv2.transpose(test_img), 1)
                    test_imgl90 = cv2.flip(cv2.transpose(test_img), 0)
                    #test_imgr90 = cv2.flip(cv2.transpose(test_img), 1)
                    
                    test_imgr90 = cv2.resize(test_imgr90,(224,224))
                    test_imgl90 = cv2.resize(test_imgl90,(224,224))
                    
                    test_imgr90 = preprocess_input(test_imgr90)
                    test_imgl90 = preprocess_input(test_imgl90)
                    
                    
                    
                    
                    test_img = cv2.resize(test_img, (224,224))
                    test_img = preprocess_input(test_img)
                    #vgg submodel detection
                    ans1 = vggmodel.predict(test_img.reshape(1,224,224,3))
                    #ans2 = vggmodel.predict(test_imgr90.reshape(1,224,224,3))
                    #ans3 = vggmodel.predict(test_imgl90.reshape(1,224,224,3))
                    pos = [ans1[0][0]]
                    """

                    body_in = []
                    #for result in pos:
                    #    if result > 0.85: flag += 1
                    #print(flag)
                    cv2.rectangle(vimage, (xmin, ymin), (xmax, ymax),
                                  color=(255, 0, 0),
                                  thickness=2)
                    cv2.putText(vimage, "hand", (xmin, ymin - 3),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 255, 0), 1)
                    """
                    if flag == 0:
                        for result in pos:
                            result = result + 0.1 * (finum - 1)
                            if result > 0.7 and finum >= 2: flag += 1
                            if finum >= 3: flag += 1
                    """
                    flag = 0
                    if flag == 0:
                        vect1 = [xmin, ymin, xmax, ymax]
                        pastTrue = 0
                        #print(frame_info)
                        for framebefore in range(len(frame_info)):
                            if frame_info[len(frame_info) - 1 -
                                          framebefore][0] == lastTime:
                                t = frame_info[len(frame_info) - 1 -
                                               framebefore]
                                vect2 = t[3:]
                                vwidth = np.min([xmax, vect2[2]]) - np.max(
                                    [xmin, vect2[0]]) + 1
                                vheight = np.min([ymax, vect2[3]]) - np.max(
                                    [ymin, vect2[1]]) + 1

                                if vwidth < 0 or vheight < 0: continue
                                nsq = (ymax - ymin + 1) * (xmax - xmin + 1)
                                print("overlap fration:",
                                      vwidth * vheight / nsq)
                                if vwidth * vheight / nsq > 0.6:
                                    pastTrue += 1

                            elif frame_info[len(frame_info) - 1 -
                                            framebefore][0] < lastTime:
                                break

                        if pastTrue > 0 and finum >= 1:
                            flag += 1

                    #flag = 1

                    if openposeJson:

                        video_file_name = os.listdir(openposeJson)
                        body_info = json.load(
                            open(openposeJson + video_file_name[num_frame],
                                 "r"))["people"]
                        for h in range(len(body_info)):
                            partsplit = {
                                "main body": [],
                                "left hand above": [],
                                "left hand below": [],
                                "right hand above": [],
                                "right hand below": [],
                                "left leg above": [],
                                "left leg below": [],
                                "right leg above": [],
                                "right leg below": [],
                                "head": []
                            }
                            detail = body_info[h]["pose_keypoints_2d"]
                            if detail[51] != 0 and detail[54] != 0 and detail[
                                    4] != 0:
                                xminpos = int(
                                    np.minimum(detail[54], detail[51])) - 5
                                yminpos = int(detail[52]) - 50
                                xmaxpos = int(
                                    np.maximum(detail[51], detail[54])) + 5
                                ymaxpos = int(detail[4])
                                partsplit["head"] = [
                                    xminpos, yminpos, xmaxpos, ymaxpos
                                ]

                            if detail[6] != 0 and detail[15] != 0:
                                xminpos = int(np.minimum(
                                    detail[15], detail[6]))
                                yminpos = int(np.minimum(
                                    detail[7], detail[16]))
                                xmaxpos = int(np.maximum(
                                    detail[6], detail[15]))
                                if detail[24] != 0:
                                    ymaxpos = int(detail[25])
                                else:
                                    ymaxpos = np.shape(vimage)[0]
                                partsplit["main body"] = [
                                    xminpos, yminpos, xmaxpos, ymaxpos
                                ]

                                if detail[9] != 0:
                                    xminpos = int(
                                        np.minimum(detail[6], detail[9]))
                                    yminpos = int(
                                        np.minimum(detail[7], detail[10]))
                                    xmaxpos = int(
                                        np.maximum(detail[6], detail[9]))
                                    ymaxpos = int(
                                        np.maximum(detail[7], detail[10]))
                                    partsplit["right hand above"] = [
                                        xminpos, yminpos, xmaxpos, ymaxpos
                                    ]

                                    if detail[12] != 0:
                                        xminpos = int(
                                            np.minimum(detail[12], detail[9]))
                                        yminpos = int(
                                            np.minimum(detail[13], detail[10]))
                                        xmaxpos = int(
                                            np.maximum(detail[12], detail[9]))
                                        ymaxpos = int(
                                            np.maximum(detail[13], detail[10]))
                                        partsplit["right hand below"] = [
                                            xminpos, yminpos, xmaxpos, ymaxpos
                                        ]

                                if detail[18] != 0:
                                    xminpos = int(
                                        np.minimum(detail[15], detail[18]))
                                    yminpos = int(
                                        np.minimum(detail[16], detail[19]))
                                    xmaxpos = int(
                                        np.maximum(detail[15], detail[18]))
                                    ymaxpos = int(
                                        np.maximum(detail[16], detail[19]))
                                    partsplit["left hand above"] = [
                                        xminpos, yminpos, xmaxpos, ymaxpos
                                    ]

                                    if detail[21] != 0:
                                        xminpos = int(
                                            np.minimum(detail[21], detail[18]))
                                        yminpos = int(
                                            np.minimum(detail[22], detail[19]))
                                        xmaxpos = int(
                                            np.maximum(detail[21], detail[18]))
                                        ymaxpos = int(
                                            np.maximum(detail[22], detail[19]))
                                        partsplit["left hand below"] = [
                                            xminpos, yminpos, xmaxpos, ymaxpos
                                        ]

                            if detail[27] != 0 and detail[30] != 0:
                                xminpos = int(
                                    np.minimum(detail[24], detail[30]))
                                yminpos = int(
                                    np.minimum(detail[28], detail[31]))
                                xmaxpos = int(
                                    np.maximum(detail[24], detail[30]))
                                ymaxpos = int(
                                    np.maximum(detail[28], detail[31]))
                                partsplit["right leg above"] = [
                                    xminpos, yminpos, xmaxpos, ymaxpos
                                ]

                                if detail[33] != 0:
                                    xminpos = int(
                                        np.minimum(detail[30], detail[33]))
                                    yminpos = int(
                                        np.minimum(detail[31], detail[34]))
                                    xmaxpos = int(
                                        np.maximum(detail[30], detail[33]))
                                    ymaxpos = int(
                                        np.maximum(detail[31], detail[34]))
                                    partsplit["right leg below"] = [
                                        xminpos, yminpos, xmaxpos, ymaxpos
                                    ]

                            if detail[36] != 0 and detail[39] != 0:
                                xminpos = int(
                                    np.minimum(detail[24], detail[39]))
                                yminpos = int(
                                    np.minimum(detail[37], detail[40]))
                                xmaxpos = int(
                                    np.maximum(detail[24], detail[39]))
                                ymaxpos = int(
                                    np.maximum(detail[37], detail[40]))
                                partsplit["left leg above"] = [
                                    xminpos, yminpos, xmaxpos, ymaxpos
                                ]

                                if detail[42] != 0:
                                    xminpos = int(
                                        np.minimum(detail[39], detail[42]))
                                    yminpos = int(
                                        np.minimum(detail[40], detail[43]))
                                    xmaxpos = int(
                                        np.maximum(detail[39], detail[42]))
                                    ymaxpos = int(
                                        np.maximum(detail[40], detail[43]))
                                    partsplit["left leg below"] = [
                                        xminpos, yminpos, xmaxpos, ymaxpos
                                    ]

                            for x in range(
                                    len(body_info[h]["pose_keypoints_2d"])):

                                if x % 3 == 0 and x / 3 != 4 and x / 3 != 7:
                                    xpos = int(
                                        body_info[h]["pose_keypoints_2d"][x])
                                    ypos = int(
                                        body_info[h]["pose_keypoints_2d"][x +
                                                                          1])
                                    #print(xpos, ypos)
                                    if (xpos >= xmin and xpos <= xmax) and (
                                            ypos >= ymin and ypos <= ymax):
                                        body_in.append(openpose_part[x // 3])

                            if True:
                                for keyname in partsplit.keys():
                                    if partsplit[keyname] != []:
                                        btemp = partsplit[keyname]
                                        #print(btemp)
                                        owidth = np.minimum(
                                            btemp[2], xmax) - np.maximum(
                                                xmin, btemp[0]) + 1
                                        oheight = np.minimum(
                                            btemp[3], ymax) - np.maximum(
                                                ymin, btemp[1]) + 1
                                        wholehand = (ymax - ymin +
                                                     1) * (xmax - xmin + 1)
                                        cv2.rectangle(vimage,
                                                      (btemp[0], btemp[1]),
                                                      (btemp[2], btemp[3]),
                                                      (0, 0, 255), 1)
                                        cv2.putText(
                                            vimage, keyname, (int(
                                                (btemp[2] + btemp[0]) / 2) - 1,
                                                              btemp[1] - 3),
                                            cv2.FONT_HERSHEY_SIMPLEX, 0.35,
                                            (0, 255, 255), 1)
                                        #if keyname == "main body":
                                        #    cv2.putText(vimage,keyname,(btemp[0], btemp[3] + 3), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0,255,255), 1)
                                        #    print("main body is", btemp,"hand is",[xmin,ymin,xmax,ymax])
                                        if owidth < 0 or oheight < 0: continue
                                        oarea = owidth * oheight
                                        print("keyname is", keyname)
                                        print("flag is", flag)
                                        print("btemp is", btemp, "hand is",
                                              [xmin, ymin, xmax, ymax])
                                        print("fraction is:",
                                              oarea / wholehand)
                                        if oarea / wholehand > 0.2:
                                            body_in.append(keyname)
                                            #print("body",btemp,"hand",[xmin,ymin,xmax,ymax])

                            #print((res))
                            for i in range(len(res[0])):
                                if res[0][i][1] < conf_threshold: continue
                                for j in range(i + 1, len(res[0])):
                                    if res[0][j][1] < conf_threshold: continue
                                    temp1 = res[0][i]
                                    temp2 = res[0][j]
                                    width = np.min([
                                        int(temp1[4]),
                                        int(temp2[4])
                                    ]) - np.max([int(temp1[2]),
                                                 int(temp2[2])]) + 1
                                    height = np.min([
                                        int(temp1[5]),
                                        int(temp2[5])
                                    ]) - np.max([int(temp1[3]),
                                                 int(temp2[3])]) + 1
                                    if width < 0 or height < 0: continue
                                    area1 = (temp1[5] - temp1[3] +
                                             1) * (temp1[4] - temp1[2] + 1)
                                    area2 = (temp2[5] - temp2[3] +
                                             1) * (temp2[4] - temp2[2] + 1)
                                    overlap = width * height
                                    ratio = overlap / (area1 + area2 - overlap)
                                    if ratio > 0.6: body_in.append("hand")

                    print("body part is", body_in)
                    frame_info.append(
                        [milliseconds, flag, finum, xmin, ymin, xmax, ymax])
                    if flag > 0 and len(body_in) != 0:
                        cv2.putText(vimage, "touch", (xmax, ymin - 3),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.35,
                                    (0, 0, 255), 1)
                        temp["hand status"].append("touch")

                    else:
                        cv2.putText(vimage, "non - touch", (xmax, ymin - 3),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.35,
                                    (0, 0, 255), 1)
                        temp["hand status"].append("non - touch")

                    if temp["hand status"][-1] == "touch":
                        temp["body part"].append(body_in)
                    else:
                        temp["body part"].append([])

                if len(temp["hand status"]) == 0:
                    video_info[str(seconds)].append("hand not exist")
                    labelline.append("hand not exist")
                else:
                    video_info[str(seconds)].append(temp)
                    labelline.append("hand exist")

                if "touch" in temp["hand status"]:

                    handStatus.append("touch exist")
                else:

                    handStatus.append("no touch exist")

            else:
                video_info[str(seconds)].append("hand not exist")
                labelline.append("hand not exist")
                handStatus.append("no touch exist")

            curr_time = timer()
            exec_time = curr_time - prev_time
            prev_time = curr_time
            accum_time += exec_time
            curr_fps = int(1 / exec_time)

            num_frame += 1
            lastTime = milliseconds
            #print(curr_time, res[0])
            fps = "FPS:" + str(curr_fps)
            curr_fps = 0
            cv2.rectangle(vimage, (0, 0), (50, 17), (255, 255, 255), -1)
            cv2.putText(vimage, fps, (3, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.35,
                        (0, 0, 0), 1)
            cv2.imshow("SSD result", vimage)
            out.write(vimage)
            cv2.waitKey(1)

Пример #11

Показать файл

Файл: retinaface.py Проект: zhulingling1995/retinaface-tf2

class Retinaface(object):
    _defaults = {
        "model_path": 'model_data/retinaface_mobilenet025.h5',
        "backbone": 'mobilenet',
        "confidence": 0.5,
        "nms_iou": 0.45,
        #----------------------------------------------------------------------#
        #   是否需要进行图像大小限制。
        #   开启后，会将输入图像的大小限制为input_shape。否则使用原图进行预测。
        #   tf2代码中主干为mobilenet时存在小bug，当输入图像的宽高不为32的倍数
        #   会导致检测结果偏差，主干为resnet50不存在此问题。
        #   可根据输入图像的大小自行调整input_shape，注意为32的倍数，如[640, 640, 3]
        #----------------------------------------------------------------------#
        "input_shape": [1280, 1280, 3],
        "letterbox_image": True
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #---------------------------------------------------#
    #   初始化Retinaface
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        if self.backbone == "mobilenet":
            self.cfg = cfg_mnet
        else:
            self.cfg = cfg_re50
        self.bbox_util = BBoxUtility(nms_thresh=self.nms_iou)
        self.generate()
        self.anchors = Anchors(self.cfg,
                               image_size=(self.input_shape[0],
                                           self.input_shape[1])).get_anchors()

    #---------------------------------------------------#
    #   载入模型
    #---------------------------------------------------#
    def generate(self):
        model_path = os.path.expanduser(self.model_path)
        assert model_path.endswith(
            '.h5'), 'tensorflow.keras model or weights must be a .h5 file.'

        #-------------------------------#
        #   载入模型与权值
        #-------------------------------#
        self.retinaface = RetinaFace(self.cfg, self.backbone)
        self.retinaface.load_weights(self.model_path)
        print('{} model, anchors loaded.'.format(self.model_path))

    @tf.function
    def get_pred(self, photo):
        preds = self.retinaface(photo, training=False)
        return preds

    #---------------------------------------------------#
    #   检测图片
    #---------------------------------------------------#
    def detect_image(self, image):
        #---------------------------------------------------#
        #   对输入图像进行一个备份，后面用于绘图
        #---------------------------------------------------#
        old_image = image.copy()

        image = np.array(image, np.float32)
        im_height, im_width, _ = np.shape(image)

        #---------------------------------------------------#
        #   计算scale，用于将获得的预测框转换成原图的高宽
        #---------------------------------------------------#
        scale = [
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0]
        ]
        scale_for_landmarks = [
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0]
        ]

        #---------------------------------------------------------#
        #   letterbox_image可以给图像增加灰条，实现不失真的resize
        #---------------------------------------------------------#
        if self.letterbox_image:
            image = letterbox_image(image,
                                    [self.input_shape[1], self.input_shape[0]])
        else:
            self.anchors = Anchors(self.cfg,
                                   image_size=(im_height,
                                               im_width)).get_anchors()

        #-----------------------------------------------------------#
        #   图片预处理，归一化。
        #-----------------------------------------------------------#
        photo = np.expand_dims(preprocess_input(image), 0)

        preds = self.get_pred(photo)
        preds = [pred.numpy() for pred in preds]
        #-----------------------------------------------------------#
        #   将预测结果进行解码
        #-----------------------------------------------------------#
        results = self.bbox_util.detection_out(
            preds, self.anchors, confidence_threshold=self.confidence)

        #--------------------------------------#
        #   如果没有检测到物体，则返回原图
        #--------------------------------------#
        if len(results) <= 0:
            return old_image

        results = np.array(results)
        #---------------------------------------------------------#
        #   如果使用了letterbox_image的话，要把灰条的部分去除掉。
        #---------------------------------------------------------#
        if self.letterbox_image:
            results = retinaface_correct_boxes(
                results, np.array([self.input_shape[0], self.input_shape[1]]),
                np.array([im_height, im_width]))

        results[:, :4] = results[:, :4] * scale
        results[:, 5:] = results[:, 5:] * scale_for_landmarks

        for b in results:
            text = "{:.4f}".format(b[4])
            b = list(map(int, b))

            # b[0]-b[3]为人脸框的坐标，b[4]为得分
            cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255),
                          2)
            cx = b[0]
            cy = b[1] + 12
            cv2.putText(old_image, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX,
                        0.5, (255, 255, 255))

            print(b[0], b[1], b[2], b[3], b[4])
            # b[5]-b[14]为人脸关键点的坐标
            cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4)
            cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4)
            cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4)
            cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4)
            cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4)
        return old_image

Пример #12

Показать файл

Файл: ssd_predict.py Проект: omar16100/SSD_tf_keras

class detector(object):

    def __init__(self, weight_path=None):

        self.classes = config.CLASSES
        self.input_shape = config.IMAGE_SIZE
        self._load_weigth(weight_path=weight_path)
        self.bbox_util = BBoxUtility(len(self.classes))
    
    def _load_weigth(self, weight_path=None):
        
        weight_path = os.path.expanduser(weight_path)
        assert weight_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'

        # load weigth file
        self.model = SSD300(config.IMAGE_SIZE, len(self.classes), anchors=config.ANCHORS_SIZE)
        self.model.load_weights(weight_path)

        # Set every class' color
        hsv_tuples = [(x / len(self.classes), 1., 1.) for x in range(len(self.classes))]
        self.colors = list(map(lambda x:colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(map(lambda x:(int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors))
    
    @tf.function
    def get_pred(self, photo):
        preds = self.model(photo, training=False)
        return preds
    
    # Detected Image
    def detect_image(self, image):

        image_shape = np.array(np.shape(image)[0:2])
        crop_image,x_offset,y_offset = letterbox_image(image, (self.input_shape[0], self.input_shape[1]))
        photo = np.array(crop_image, dtype=np.float64)

        # Normalization
        photo = preprocess_input(np.reshape(photo, [1, self.input_shape[0], self.input_shape[1], 3]))
        preds = self.get_pred(photo).numpy()

        # Decode
        results = self.bbox_util.detection_out(preds, confidence_threshold=config.CONFIDENCE)
        
        if len(results[0]) <= 0:
            return image
        
        det_label = results[0][:, 0]
        det_conf = results[0][:, 1]
        det_xmin, det_ymin, det_xmax, det_ymax = results[0][:,2], results[0][:, 3], results[0][:, 4], results[0][:, 5]
        top_indices = [i for i, conf in enumerate(det_conf) if conf >= config.CONFIDENCE]
        top_conf = det_conf[top_indices]
        top_label_indices = det_label[top_indices].tolist()
        top_xmin = np.expand_dims(det_xmin[top_indices], axis=-1)
        top_ymin = np.expand_dims(det_ymin[top_indices], axis=-1)
        top_xmax = np.expand_dims(det_xmax[top_indices], axis=-1)
        top_ymax = np.expand_dims(det_ymax[top_indices], axis=-1)

        boxes = ssd_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax, np.array((self.input_shape[0], self.input_shape[1])), image_shape)
        
        font = ImageFont.truetype(font='simhei.ttf',size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32'))
        thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.input_shape[0]
        
        for i, c in enumerate(top_label_indices):

            predicted_class = self.classes[int(c) - 1]
            score = top_conf[i]

            ymin, xmin, ymax, xmax = boxes[i]
            ymin = ymin - 5
            xmin = xmin - 5
            ymax = ymax - 5
            xmax = xmax - 5

            ymin = max(0, np.floor(ymin + 0.5).astype('int32'))
            xmin = max(0, np.floor(xmin + 0.5).astype('int32'))
            ymax = min(np.shape(image)[0], np.floor(ymax + 0.5).astype('int32'))
            xmax = min(np.shape(image)[1], np.floor(xmax + 0.5).astype('int32'))

            # draw Bounding box
            label = "{}:{:.2f}".format(predicted_class, score)
            print(label)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')

            if ymin - label_size[1] >= 0:
                text_origin = np.array((xmin, ymin - label_size[1]))
            else:
                text_origin = np.array((xmin, ymin + 1))
            for i in range(thickness):
                draw.rectangle(
                    [xmin + i, ymin + i, xmax - i, ymax - i],
                    outline=self.colors[int(c)-1])
            draw.rectangle(
                [tuple(text_origin), tuple(text_origin + label_size)],
                fill=self.colors[int(c)-1])
            draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font)
            del draw
        return image

Пример #13

Показать файл

Файл: retinaface.py Проект: VSOURCE-Platform/VSOURCE_FACE_PLATFORM

class Retinaface(object):
    _defaults = {
        "model_path": 'model_data/retinaface_mobilenet025.h5',
        "backbone": 'mobilenet',
        "confidence": 0.6,
        "nms_iou": 0.4,
        #----------------------------------------------------------------------#
        #   是否需要进行图像大小限制。
        #   开启后，会将输入图像的大小限制为input_shape。否则使用原图进行预测。
        #   keras代码中主干为mobilenet时存在小bug，当输入图像的宽高不为32的倍数
        #   会导致检测结果偏差，主干为resnet50不存在此问题。
        #   可根据输入图像的大小自行调整input_shape，注意为32的倍数，如[640, 640, 3]
        #----------------------------------------------------------------------#
        "input_shape": [1280, 1280, 3],
        "letterbox_image": False
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #---------------------------------------------------#
    #   初始化Retinaface
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        if self.backbone == "mobilenet":
            self.cfg = cfg_mnet
        else:
            self.cfg = cfg_re50
        self.bbox_util = BBoxUtility(nms_thresh=self.nms_iou)
        self.generate()
        self.anchors = Anchors(self.cfg,
                               image_size=(self.input_shape[0],
                                           self.input_shape[1])).get_anchors()

    def generate(self):
        model_path = os.path.expanduser(self.model_path)
        assert model_path.endswith(
            '.h5'), 'Keras model or weights must be a .h5 file.'

        self.retinaface = RetinaFace(self.cfg, self.backbone)
        self.retinaface.load_weights(self.model_path, by_name=True)
        print('{} model, anchors, and classes loaded.'.format(model_path))

    def detect_image(self, image):
        old_image = image.copy()

        image = np.array(image, np.float32)
        im_height, im_width, _ = np.shape(image)

        scale = [
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0]
        ]
        scale_for_landmarks = [
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0]
        ]

        if self.letterbox_image:
            image = letterbox_image(image,
                                    [self.input_shape[1], self.input_shape[0]])
        else:
            self.anchors = Anchors(self.cfg,
                                   image_size=(im_height,
                                               im_width)).get_anchors()

        photo = np.expand_dims(preprocess_input(image), 0)

        preds = self.retinaface.predict(photo)
        results = self.bbox_util.detection_out(
            preds, self.anchors, confidence_threshold=self.confidence)

        if len(results) <= 0:
            return old_image, []

        results = np.array(results)
        if self.letterbox_image:
            results = retinaface_correct_boxes(
                results, np.array([self.input_shape[0], self.input_shape[1]]),
                np.array([im_height, im_width]))

        results[:, :4] = results[:, :4] * scale
        results[:, 5:] = results[:, 5:] * scale_for_landmarks

        ans = []
        for b in results:
            confidence = b[4].astype(float)
            each_ans = {'box': [0, 0, 0, 0], 'confidence': 0, 'landmarks': []}
            text = "{:.4f}".format(b[4])
            b = list(map(int, b))

            each_ans['box'][0] = b[0]
            each_ans['box'][1] = b[1]
            each_ans['box'][2] = b[2]
            each_ans['box'][3] = b[3]
            each_ans['confidence'] = confidence

            cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255),
                          2)
            cx = b[0]
            cy = b[1] + 12
            cv2.putText(old_image, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX,
                        0.5, (255, 255, 255))

            print(b[0], b[1], b[2], b[3], b[4])
            cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4)
            cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4)
            cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4)
            cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4)
            cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4)
            landmarks = [
                (b[5], b[6]),
                (b[7], b[8]),
                (b[9], b[10]),
                (b[11], b[12]),
                (b[13], b[14]),
            ]
            each_ans['landmarks'] = landmarks
            ans.append(each_ans)
        return old_image, ans

Пример #14

Показать файл

Файл: frcnn.py Проект: GarveyLzZ/FasterConv1D-B

class FRCNN(object):
    _defaults = {
        "model_path": 'model_data/jia_v1.h5',
        "classes_path": 'model_data/voc_classes.txt',
        "confidence": 0.7,
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #---------------------------------------------------#
    #   初始化faster RCNN
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        self.class_names = self._get_class()
        self.sess = K.get_session()
        self.config = Config()
        self.generate()
        self.bbox_util = BBoxUtility()

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def generate(self):
        model_path = os.path.expanduser(self.model_path)
        assert model_path.endswith(
            '.h5'), 'Keras model or weights must be a .h5 file.'

        # 计算总的种类
        self.num_classes = len(self.class_names) + 1

        # 载入模型，如果原来的模型里已经包括了模型结构则直接载入。
        # 否则先构建模型再载入
        self.model_rpn, self.model_classifier = frcnn.get_predict_model(
            self.config, self.num_classes)
        self.model_rpn.load_weights(self.model_path, by_name=True)
        self.model_classifier.load_weights(self.model_path,
                                           by_name=True,
                                           skip_mismatch=True)

        print('{} model, anchors, and classes loaded.'.format(model_path))

        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

    def get_img_output_length(self, width, height):
        def get_output_length(input_length):
            # input_length += 6
            filter_sizes = [7, 3, 1, 1]
            padding = [3, 1, 0, 0]
            stride = 2
            for i in range(4):
                # input_length = (input_length - filter_size + stride) // stride
                input_length = (input_length + 2 * padding[i] -
                                filter_sizes[i]) // stride + 1
            return input_length

        return get_output_length(width), get_output_length(height)

    #---------------------------------------------------#
    #   检测图片
    #---------------------------------------------------#
    def detect_image(self, raman_data):

        old_raman = copy.deepcopy(raman_data)

        raman_data = np.array(list(map(float, raman_data)),
                              dtype=np.float32).reshape(-1, 1, 1)
        raman_shape = np.array(np.shape(raman_data)[0:2])
        old_width = raman_shape[0]
        old_height = raman_shape[1]

        raman = np.array(raman_data, dtype=np.float64)

        raman = (raman - (np.min(raman))) / (np.max(raman) - np.min(raman))
        raman = np.expand_dims(raman, 0)
        # raman shape = [1,1044,1,1]
        preds = self.model_rpn.predict(raman)
        # 将预测结果进行解码
        anchors = get_anchors((66, 1), old_width, old_height)
        # preds rpn的预测结果 共有三个维度
        # 第一纬度 (1,198,1) 是包含物体的置信的
        # 第二维度 (1,198,4) 是先验框的调整参数
        # 第三个维度 (1,66,1,1024) 是feature map
        preds[1][..., 3] = 1
        anchors[:, 1] = 0
        rpn_results = self.bbox_util.detection_out(preds,
                                                   anchors,
                                                   1,
                                                   confidence_threshold=0)
        R = rpn_results[0][:, 2:]

        R[:,
          0] = np.array(np.round(R[:, 0] * old_width / self.config.rpn_stride),
                        dtype=np.int32)
        R[:, 1] = np.array(np.round(R[:, 1] * old_height), dtype=np.int32)
        R[:,
          2] = np.array(np.round(R[:, 2] * old_width / self.config.rpn_stride),
                        dtype=np.int32)
        R[:, 3] = np.array(np.round(R[:, 3] * old_height), dtype=np.int32)

        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]
        base_layer = preds[2]

        delete_line = []
        for i, r in enumerate(R):
            if r[2] < 1 or r[3] < 1:
                delete_line.append(i)
        R = np.delete(R, delete_line, axis=0)

        bboxes = []
        probs = []
        labels = []
        for jk in range(R.shape[0] // self.config.num_rois + 1):
            ROIs = np.expand_dims(R[self.config.num_rois *
                                    jk:self.config.num_rois * (jk + 1), :],
                                  axis=0)

            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // self.config.num_rois:
                #pad R
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], self.config.num_rois,
                                curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            [P_cls, P_regr] = self.model_classifier.predict([base_layer, ROIs])

            for ii in range(P_cls.shape[1]):
                if np.max(P_cls[0, ii, :-1]) < self.confidence:
                    continue

                label = np.argmax(P_cls[0, ii, :-1])

                (x, y, w, h) = ROIs[0, ii, :]

                cls_num = np.argmax(P_cls[0, ii, :-1])

                (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                tx /= self.config.classifier_regr_std[0]
                ty /= self.config.classifier_regr_std[1]
                tw /= self.config.classifier_regr_std[2]
                th /= self.config.classifier_regr_std[3]

                cx = x + w / 2.
                cy = y + h / 2.
                cx1 = tx * w + cx
                cy1 = ty * h + cy
                w1 = math.exp(tw) * w
                h1 = math.exp(th) * h

                x1 = cx1 - w1 / 2.
                y1 = cy1 - h1 / 2.

                x2 = cx1 + w1 / 2
                y2 = cy1 + h1 / 2

                x1 = int(round(x1))
                y1 = int(round(y1))
                x2 = int(round(x2))
                y2 = int(round(y2))

                bboxes.append([x1, y1, x2, y2])
                probs.append(np.max(P_cls[0, ii, :-1]))
                labels.append(label)

        if len(bboxes) == 0:
            print("None boxes")

            Raman_shift = Xexcel('./raman_data/raw_data/RamanShift.xlsx',
                                 'Sheet1')
            Normal_data = Yexcel(
                './raman_data/raw_data/yayin/no_origin_label0.xlsx',
                'no_origin_label0')  # Normal
            Normal_data = Ygetmean(Normal_data)

            Cancer_data = np.array(old_raman)
            Raman_shift = np.array(Raman_shift)
            Normal_data = np.array(Normal_data)

            # 截取数据350~4000cm-1 #
            Lower_limit = np.max(np.where(Raman_shift < 350)) + 1
            Upper_limit = np.min(np.where(Raman_shift > 4000)) + 1

            Raman_shift_limit = Raman_shift[Lower_limit:Upper_limit]
            Cancer_data_limit = Cancer_data[Lower_limit:Upper_limit]
            Normal_data_limit = Normal_data[Lower_limit:Upper_limit]

            # SG平滑处理#
            Cancer_data_SG = sp.savgol_filter(Cancer_data_limit, 11, 2)
            Normal_data_SG = sp.savgol_filter(Normal_data_limit, 11, 2)

            # 去基线处理 #
            roi = np.array([[350, 4000]])
            Cancer_data_final, Cancer_base_Intensity = rampy.baseline(
                Raman_shift_limit,
                Cancer_data_SG,
                roi,
                'arPLS',
                lam=10**6,
                ratio=0.001)
            Normal_data_final, Normal_base_Intensity = rampy.baseline(
                Raman_shift_limit,
                Normal_data_SG,
                roi,
                'arPLS',
                lam=10**6,
                ratio=0.001)

            plt.plot(Raman_shift_limit,
                     Normal_data_final,
                     ls="-",
                     lw=2,
                     c="c",
                     label="Normal")
            plt.plot(Raman_shift_limit,
                     Cancer_data_final,
                     ls="-",
                     lw=1,
                     c="b",
                     label="Cancer")

            plt.legend()
            plt.xlabel("yayin")
            # plt.savefig('./raman_data/raw_data/yayin/yayin_alter.jpg')
            plt.show()

        # 筛选出其中得分高于confidence的框
        labels = np.array(labels)
        probs = np.array(probs)
        boxes = np.array(bboxes, dtype=np.float32)
        boxes[:, 0] = boxes[:, 0] * self.config.rpn_stride / old_width
        boxes[:, 1] = boxes[:, 1] * old_height
        boxes[:, 2] = boxes[:, 2] * self.config.rpn_stride / old_width
        boxes[:, 3] = boxes[:, 3] * old_height
        results = np.array(
            self.bbox_util.nms_for_out(np.array(labels), np.array(probs),
                                       np.array(boxes), self.num_classes - 1,
                                       0.4))

        top_label_indices = results[:, 0]
        top_conf = results[:, 1]
        boxes = results[:, 2:]
        boxes[:, 0] = boxes[:, 0] * old_width
        boxes[:, 1] = boxes[:, 1] * old_height
        boxes[:, 2] = boxes[:, 2] * old_width
        boxes[:, 3] = boxes[:, 3] * old_height

        # 画基本图
        Raman_shift = Xexcel('./raman_data/raw_data/RamanShift.xlsx', 'Sheet1')
        Normal_data = Yexcel(
            './raman_data/raw_data/yayin/no_origin_label0.xlsx',
            'no_origin_label0')  # Normal
        Normal_data = Ygetmean(Normal_data)

        Cancer_data = np.array(old_raman)
        Raman_shift = np.array(Raman_shift)
        Normal_data = np.array(Normal_data)

        # 截取数据350~4000cm-1 #
        Lower_limit = np.max(np.where(Raman_shift < 350)) + 1
        Upper_limit = np.min(np.where(Raman_shift > 4000)) + 1

        Raman_shift_limit = Raman_shift[Lower_limit:Upper_limit]
        Cancer_data_limit = Cancer_data[Lower_limit:Upper_limit]
        Normal_data_limit = Normal_data[Lower_limit:Upper_limit]

        # SG平滑处理#
        Cancer_data_SG = sp.savgol_filter(Cancer_data_limit, 11, 2)
        Normal_data_SG = sp.savgol_filter(Normal_data_limit, 11, 2)

        # 去基线处理 #
        roi = np.array([[350, 4000]])
        Cancer_data_final, Cancer_base_Intensity = rampy.baseline(
            Raman_shift_limit,
            Cancer_data_SG,
            roi,
            'arPLS',
            lam=10**6,
            ratio=0.001)
        Normal_data_final, Normal_base_Intensity = rampy.baseline(
            Raman_shift_limit,
            Normal_data_SG,
            roi,
            'arPLS',
            lam=10**6,
            ratio=0.001)

        plt.plot(Raman_shift_limit,
                 Normal_data_final,
                 ls="-",
                 lw=2,
                 c="c",
                 label="Normal")
        plt.plot(Raman_shift_limit,
                 Cancer_data_final,
                 ls="-",
                 lw=1,
                 c="b",
                 label="Cancer")

        plt.legend()
        plt.xlabel("yayin")

        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c)]
            score = top_conf[i]

            left, top, right, bottom = boxes[i]

            # left = max(1, np.floor(left + 0.5).astype('int32'))
            # right = min(1043, np.floor(right + 0.5).astype('int32'))

            left = max(-30, np.floor(left - 0.5).astype('int32') * 4)
            right = min(4080, np.floor(right - 0.5).astype('int32') * 4)

            label = '{} {:.2f}'.format(predicted_class, score)
            label = label.encode('utf-8')

            # print(label ,"  ", "[", left , ", " , right , "]", " ", "[",  X[left-1], ",", X[right-1], "]")
            # plt.axvspan(xmin=X[left-1], xmax=X[right-1], facecolor='y', alpha=0.3)

            print(label, "  ", "[", left, ", ", right, "]")
            plt.axvspan(xmin=left, xmax=right, facecolor='y', alpha=0.3)

        plt.show()

    def close_session(self):
        self.sess.close()

Пример #15

Показать файл

class myFRCNN_img_retrieve(object):
    _defaults = {
        "model_path": respath.DEFAULT_WEIGHT_FILE,
        "classes_path": respath.DEFAULT_CLASSES_FILE,
        "confidence": 0.01,
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    # ---------------------------------------------------#
    #   初始化faster RCNN
    # ---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        self.class_names = self._get_class()
        self.sess = K.get_session()
        self.config = Config()
        self.generate()
        self.bbox_util = BBoxUtility()

    # ---------------------------------------------------#
    #   获得所有的分类
    # ---------------------------------------------------#
    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names

    # ---------------------------------------------------#
    #   获得所有的分类
    # ---------------------------------------------------#
    def generate(self):
        model_path = os.path.expanduser(self.model_path)
        assert model_path.endswith(
            '.h5'), 'Keras model or weights must be a .h5 file.'

        # 计算总的种类
        self.num_classes = len(self.class_names) + 1

        # 载入模型，如果原来的模型里已经包括了模型结构则直接载入。
        # 否则先构建模型再载入
        self.model_rpn, self.model_classifier = myfrcnn_retrieve.get_predict_model(
            self.config, self.num_classes)
        self.model_rpn.load_weights(self.model_path, by_name=True)
        self.model_classifier.load_weights(self.model_path,
                                           by_name=True,
                                           skip_mismatch=True)

        self.mode_ROIout = myfrcnn_retrieve.get_ROIout_model(
            self.config, self.num_classes)
        self.mode_ROIout.load_weights(self.model_path,
                                      by_name=True,
                                      skip_mismatch=True)

        self.featuremap_model = myfrcnn_retrieve.get_featuremap_model(
            self.config, self.num_classes)
        self.featuremap_model.load_weights(self.model_path,
                                           by_name=True,
                                           skip_mismatch=True)

        print('{} model, anchors, and classes loaded.'.format(model_path))

        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

    def get_img_output_length(self, width, height):
        def get_output_length(input_length):
            # input_length += 6
            filter_sizes = [7, 3, 1, 1]
            padding = [3, 1, 0, 0]
            stride = 2
            for i in range(4):
                # input_length = (input_length - filter_size + stride) // stride
                input_length = (input_length + 2 * padding[i] -
                                filter_sizes[i]) // stride + 1
            return input_length

        return get_output_length(width), get_output_length(height)

        # ---------------------------------------------------#

    #   提取特征
    # ---------------------------------------------------#
    def extract_feature(self, image):
        #        image_shape = np.array(np.shape(image)[0:2])
        #        old_width = image_shape[1]#原始图片的宽高
        #        old_height = image_shape[0]
        #        old_image = copy.deepcopy(image)
        #        width,height = get_new_img_size(old_width,old_height, img_min_side=416)
        #
        #        image.resize([width,height])
        image = np.array(image, dtype=np.float64)  # 416*416
        assert image.shape == (416, 416)
        photo = np.stack((image, image, image), axis=2)  # （416，416，3）
        photo = np.expand_dims(photo, 0)
        photo = photo / np.max(np.abs(photo))
        myfeature_map = self.featuremap_model.predict(photo)
        return myfeature_map

        # image = np.array(image, dtype=np.float64)  # 416*416
        # assert image.shape == (416, 416)
        # photo = np.stack((image, image, image), axis=2)  # （416，416，3）
        #
        # photo = preprocess_input(np.expand_dims(photo, 0), mode='tf')  # （1,416,416,3）
        # #        preds = self.model_rpn.predict(photo) #输出是粗分类(1,6048,1) 粗框调整参数(1,6048,4) 和feature map(1,26,26,1024)
        # #        featuremap = preds[2] #featuremap
        #
        # myfeature_map = self.featuremap_model.predict(photo)
        # return myfeature_map

    # ---------------------------------------------------#
    #   检测图片
    # ---------------------------------------------------#
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])
        old_width = image_shape[1]  # 原始图片的宽高
        old_height = image_shape[0]
        old_image = copy.deepcopy(image)
        width, height = get_new_img_size(old_width,
                                         old_height,
                                         img_min_side=416)

        image.resize([width, height])
        photo = np.array(image, dtype=np.float64)
        try:
            temp = photo.shape[2]
        except:
            photo = np.stack((photo, photo, photo), axis=2)
        else:
            pass

        # 图片预处理，归一化
        photo = preprocess_input(np.expand_dims(photo, 0))
        preds = self.model_rpn.predict(photo)  # 输出是粗分类 粗框调整参数 和feature map
        # 将预测结果进行解码
        anchors = get_anchors(self.get_img_output_length(width, height), width,
                              height)
        # rpn_results labels, confs, good_boxes
        rpn_results = self.bbox_util.detection_out(
            preds, anchors, 1,
            confidence_threshold=0)  # confidence_threshold的值？现在还未分类 有和无
        R = rpn_results[0][:, 2:]  # 很多数量的建议框 需要分批传入classifier来进一步处理

        R[:, 0] = np.array(np.round(R[:, 0] * width / self.config.rpn_stride),
                           dtype=np.int32)  # 对应featuremap大小
        R[:, 1] = np.array(np.round(R[:, 1] * height / self.config.rpn_stride),
                           dtype=np.int32)
        R[:, 2] = np.array(np.round(R[:, 2] * width / self.config.rpn_stride),
                           dtype=np.int32)
        R[:, 3] = np.array(np.round(R[:, 3] * height / self.config.rpn_stride),
                           dtype=np.int32)
        # convert from (x1,y1,x2,y2) to (x,y,w,h)
        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]
        base_layer = preds[2]  # featuremap

        delete_line = []
        for i, r in enumerate(R):
            if r[2] < 1 or r[3] < 1:
                delete_line.append(i)
        R = np.delete(R, delete_line, axis=0)

        bboxes = []
        probs = []
        labels = []
        for jk in range(R.shape[0] // self.config.num_rois + 1):
            ROIs = np.expand_dims(R[self.config.num_rois *
                                    jk:self.config.num_rois * (jk + 1), :],
                                  axis=0)
            # 一个批次的建议框
            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // self.config.num_rois:
                # pad R 填充 不够一个批次的建议框
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], self.config.num_rois,
                                curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            [P_cls,
             P_regr] = self.model_classifier.predict([base_layer,
                                                      ROIs])  # 类别和调整位置参数

            for ii in range(P_cls.shape[1]):
                if np.max(P_cls[0, ii, :-1]) < self.confidence:
                    continue

                label = np.argmax(P_cls[0, ii, :-1])

                (x, y, w, h) = ROIs[0, ii, :]

                cls_num = np.argmax(P_cls[0, ii, :-1])

                (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                tx /= self.config.classifier_regr_std[0]  # 处理系数
                ty /= self.config.classifier_regr_std[1]
                tw /= self.config.classifier_regr_std[2]
                th /= self.config.classifier_regr_std[3]

                cx = x + w / 2.  # 建议框中心点
                cy = y + h / 2.
                cx1 = tx * w + cx  # 调整后的建议框中心
                cy1 = ty * h + cy
                w1 = math.exp(tw) * w
                h1 = math.exp(th) * h

                x1 = cx1 - w1 / 2.  # 调整后的建议框的左上 右下
                y1 = cy1 - h1 / 2.

                x2 = cx1 + w1 / 2
                y2 = cy1 + h1 / 2

                x1 = int(round(x1))
                y1 = int(round(y1))
                x2 = int(round(x2))
                y2 = int(round(y2))

                bboxes.append([x1, y1, x2, y2])
                probs.append(np.max(P_cls[0, ii, :-1]))
                labels.append(label)

        if len(bboxes) == 0:
            return old_image

        # 筛选出其中得分高于confidence的框
        labels = np.array(labels)
        probs = np.array(probs)
        boxes = np.array(bboxes, dtype=np.float32)
        boxes[:, 0] = boxes[:, 0] * self.config.rpn_stride / width
        boxes[:, 1] = boxes[:, 1] * self.config.rpn_stride / height
        boxes[:, 2] = boxes[:, 2] * self.config.rpn_stride / width
        boxes[:, 3] = boxes[:, 3] * self.config.rpn_stride / height
        results = np.array(
            self.bbox_util.nms_for_out(np.array(labels), np.array(probs),
                                       np.array(boxes), self.num_classes - 1,
                                       0.4))

        top_label_indices = results[:, 0]
        top_conf = results[:, 1]
        boxes = results[:, 2:]
        boxes[:, 0] = boxes[:, 0] * old_width
        boxes[:, 1] = boxes[:, 1] * old_height
        boxes[:, 2] = boxes[:, 2] * old_width
        boxes[:, 3] = boxes[:, 3] * old_height

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = (np.shape(old_image)[0] +
                     np.shape(old_image)[1]) // old_width * 2
        image = old_image
        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c)]
            score = top_conf[i]

            left, top, right, bottom = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle(
                    [left + i, top + i, right - i, bottom - i],
                    #                    outline=self.colors[int(c)])
                    outline=None)
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                #                fill=self.colors[int(c)])
                fill=None)
            try:
                draw.text(text_origin,
                          str(label, 'UTF-8'),
                          fill=(250, ),
                          font=font)
            except:
                draw.text(text_origin,
                          str(label, 'UTF-8'),
                          fill=(250, 250, 250),
                          font=font)
            else:
                pass
            del draw
        return image

    def close_session(self):
        self.sess.close()