示例#1
0
    def detect_image(self, image_id, image):
        self.confidence = 0.01
        self.iou = 0.45
        f = open("./input/detection-results/" + image_id + ".txt", "w")

        with torch.no_grad():
            image_shape = np.array(np.shape(image)[0:2])
            old_width = image_shape[1]
            old_height = image_shape[0]
            width, height = get_new_img_size(old_width, old_height)

            image = image.resize([width, height], Image.BICUBIC)
            photo = np.array(image, dtype=np.float32) / 255
            photo = np.transpose(photo, (2, 0, 1))

            images = []
            images.append(photo)
            images = np.asarray(images)
            images = torch.from_numpy(images)
            if self.cuda:
                images = images.cuda()

            roi_cls_locs, roi_scores, rois, roi_indices = self.model(images)
            decodebox = DecodeBox(self.std, self.mean, self.num_classes)
            outputs = decodebox.forward(roi_cls_locs,
                                        roi_scores,
                                        rois,
                                        height=height,
                                        width=width,
                                        nms_iou=self.iou,
                                        score_thresh=self.confidence)
            if len(outputs) == 0:
                return
            bbox = outputs[:, :4]
            conf = outputs[:, 4]
            label = outputs[:, 5]

            bbox[:, 0::2] = (bbox[:, 0::2]) / width * old_width
            bbox[:, 1::2] = (bbox[:, 1::2]) / height * old_height
            bbox = np.array(bbox, np.int32)

        for i, c in enumerate(label):
            predicted_class = self.class_names[int(c)]
            score = str(conf[i])

            left, top, right, bottom = bbox[i]
            f.write("%s %s %s %s %s %s\n" %
                    (predicted_class, score[:6], str(int(left)), str(
                        int(top)), str(int(right)), str(int(bottom))))

        f.close()
        return
    def detect_image(self, image_id, image):
        self.confidence = 0.01
        f = open("./input/detection-results/" + image_id + ".txt", "w")

        image_shape = np.array(np.shape(image)[0:2])
        old_width, old_height = image_shape[1], image_shape[0]
        old_image = copy.deepcopy(image)

        #---------------------------------------------------------#
        #   给原图像进行resize,resize到短边为600的大小上
        #---------------------------------------------------------#
        width, height = get_new_img_size(old_width, old_height)
        image = image.resize([width, height], Image.BICUBIC)
        photo = np.array(image, dtype=np.float64)

        #-----------------------------------------------------------#
        #   图片预处理,归一化。
        #-----------------------------------------------------------#
        photo = preprocess_input(np.expand_dims(photo, 0))
        rpn_pred = self.model_rpn.predict(photo)

        #-----------------------------------------------------------#
        #   将建议框网络的预测结果进行解码
        #-----------------------------------------------------------#
        base_feature_width, base_feature_height = self.get_img_output_length(
            width, height)
        anchors = get_anchors([base_feature_width, base_feature_height], width,
                              height)
        rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors)

        #-------------------------------------------------------------#
        #   在获得建议框和共享特征层后,将二者传入classifier中进行预测
        #-------------------------------------------------------------#
        base_layer = rpn_pred[2]
        proposal_box = np.array(rpn_results)[:, :, 1:]
        temp_ROIs = np.zeros_like(proposal_box)
        temp_ROIs[:, :, [0, 1, 2, 3]] = proposal_box[:, :, [1, 0, 3, 2]]
        classifier_pred = self.model_classifier.predict(
            [base_layer, temp_ROIs])

        #-------------------------------------------------------------#
        #   利用classifier的预测结果对建议框进行解码,获得预测框
        #-------------------------------------------------------------#
        results = self.bbox_util.detection_out_classifier(
            classifier_pred, proposal_box, self.config, self.confidence)

        if len(results[0]) == 0:
            return old_image

        results = np.array(results[0])
        boxes = results[:, :4]
        top_conf = results[:, 4]
        top_label_indices = results[:, 5]
        boxes[:, [0, 2]] = boxes[:, [0, 2]] * old_width
        boxes[:, [1, 3]] = boxes[:, [1, 3]] * old_height
        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c)]
            score = str(top_conf[i])

            left, top, right, bottom = boxes[i]
            f.write("%s %s %s %s %s %s\n" %
                    (predicted_class, score[:6], str(int(left)), str(
                        int(top)), str(int(right)), str(int(bottom))))

        f.close()
        return
示例#3
0
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])
        old_width = image_shape[1]
        old_height = image_shape[0]
        old_image = copy.deepcopy(image)
        width, height = get_new_img_size(old_width, old_height)

        image = image.resize([width, height])
        photo = np.array(image, dtype=np.float64)

        # 图片预处理,归一化
        photo = preprocess_input(np.expand_dims(photo, 0))
        preds = self.model_rpn.predict(photo)
        # 将预测结果进行解码
        anchors = get_anchors(self.get_img_output_length(width, height), width,
                              height)

        rpn_results = self.bbox_util.detection_out(preds,
                                                   anchors,
                                                   1,
                                                   confidence_threshold=0)
        R = rpn_results[0][:, 2:]

        R[:, 0] = np.array(np.round(R[:, 0] * width / self.config.rpn_stride),
                           dtype=np.int32)
        R[:, 1] = np.array(np.round(R[:, 1] * height / self.config.rpn_stride),
                           dtype=np.int32)
        R[:, 2] = np.array(np.round(R[:, 2] * width / self.config.rpn_stride),
                           dtype=np.int32)
        R[:, 3] = np.array(np.round(R[:, 3] * height / self.config.rpn_stride),
                           dtype=np.int32)

        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]
        base_layer = preds[2]

        delete_line = []
        for i, r in enumerate(R):
            if r[2] < 1 or r[3] < 1:
                delete_line.append(i)
        R = np.delete(R, delete_line, axis=0)

        bboxes = []
        probs = []
        labels = []
        for jk in range(R.shape[0] // self.config.num_rois + 1):
            ROIs = np.expand_dims(R[self.config.num_rois *
                                    jk:self.config.num_rois * (jk + 1), :],
                                  axis=0)

            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // self.config.num_rois:
                #pad R
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], self.config.num_rois,
                                curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            [P_cls, P_regr] = self.model_classifier.predict([base_layer, ROIs])

            for ii in range(P_cls.shape[1]):
                if np.max(P_cls[0, ii, :-1]) < self.confidence:
                    continue

                label = np.argmax(P_cls[0, ii, :-1])

                (x, y, w, h) = ROIs[0, ii, :]

                cls_num = np.argmax(P_cls[0, ii, :-1])

                (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                tx /= self.config.classifier_regr_std[0]
                ty /= self.config.classifier_regr_std[1]
                tw /= self.config.classifier_regr_std[2]
                th /= self.config.classifier_regr_std[3]

                cx = x + w / 2.
                cy = y + h / 2.
                cx1 = tx * w + cx
                cy1 = ty * h + cy
                w1 = math.exp(tw) * w
                h1 = math.exp(th) * h

                x1 = cx1 - w1 / 2.
                y1 = cy1 - h1 / 2.

                x2 = cx1 + w1 / 2
                y2 = cy1 + h1 / 2

                x1 = int(round(x1))
                y1 = int(round(y1))
                x2 = int(round(x2))
                y2 = int(round(y2))

                bboxes.append([x1, y1, x2, y2])
                probs.append(np.max(P_cls[0, ii, :-1]))
                labels.append(label)

        if len(bboxes) == 0:
            return old_image

        # 筛选出其中得分高于confidence的框
        labels = np.array(labels)
        probs = np.array(probs)
        boxes = np.array(bboxes, dtype=np.float32)
        boxes[:, 0] = boxes[:, 0] * self.config.rpn_stride / width
        boxes[:, 1] = boxes[:, 1] * self.config.rpn_stride / height
        boxes[:, 2] = boxes[:, 2] * self.config.rpn_stride / width
        boxes[:, 3] = boxes[:, 3] * self.config.rpn_stride / height
        results = np.array(
            self.bbox_util.nms_for_out(np.array(labels), np.array(probs),
                                       np.array(boxes), self.num_classes - 1,
                                       0.4))

        top_label_indices = results[:, 0]
        top_conf = results[:, 1]
        boxes = results[:, 2:]
        boxes[:, 0] = boxes[:, 0] * old_width
        boxes[:, 1] = boxes[:, 1] * old_height
        boxes[:, 2] = boxes[:, 2] * old_width
        boxes[:, 3] = boxes[:, 3] * old_height

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = (np.shape(old_image)[0] +
                     np.shape(old_image)[1]) // old_width * 2
        image = old_image
        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c)]
            score = top_conf[i]

            left, top, right, bottom = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[int(c)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[int(c)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        return image
    def detect_image(self, image_id, image, savepath):
        image_shape = np.array(np.shape(image)[0:2])
        old_width = image_shape[1]
        old_height = image_shape[0]
        old_image = copy.deepcopy(image)
        width, height = get_new_img_size(old_width, old_height)
        image = image.resize([width, height])
        photo = np.array(image, dtype=np.float32) / 255
        photo = np.transpose(photo, (2, 0, 1))
        with torch.no_grad():
            images = []
            images.append(photo)
            images = np.asarray(images)
            images = torch.from_numpy(images).cuda()

            roi_cls_locs, roi_scores, rois, roi_indices, feature = self.model(
                images)
            decodebox = DecodeBox(self.std, self.mean, self.num_classes)
            outputs = decodebox.forward(roi_cls_locs,
                                        roi_scores,
                                        rois,
                                        feature,
                                        height=height,
                                        width=width,
                                        score_thresh=self.confidence)
            if len(outputs) == 0:
                return old_image
            if np.size(outputs, 0) > 4:
                outputs = outputs[np.argsort(outputs[:, 4])]
                outputs = outputs[-4:, :]
            bbox = outputs[:, :4]
            conf = outputs[:, 4]
            label = outputs[:, 5]
            f = outputs[:, 6:]
            f_size = np.size(f, 0)
            if f_size < 4:
                k = 4 - f_size
                cc = 0
                for lab in label:
                    if (lab == 1) or (lab == 3) or (lab == 5):
                        conbin_f = f[cc, :]
                        conbin_f = conbin_f.reshape(1, 2048)
                        for num in range(0, k):
                            f = np.append(f, conbin_f, axis=0)
                        break
                    cc = cc + 1
            bbox[:, 0::2] = (bbox[:, 0::2]) / width * old_width
            bbox[:, 1::2] = (bbox[:, 1::2]) / height * old_height
            bbox = np.array(bbox, np.int32)
            f = np.array(f, np.float32).reshape((4, 2048))
            print(np.size(f))
            # image = old_image
            # thickness = (np.shape(old_image)[0] + np.shape(old_image)[1]) // old_width * 2
            # font = ImageFont.truetype(font='model_data/simhei.ttf',
            #                           size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32'))
            #
            # for i, c in enumerate(label):
            #     predicted_class = self.class_names[int(c)]
            #     score = conf[i]
            #
            #     left, top, right, bottom = bbox[i]
            #     top = top - 5
            #     left = left - 5
            #     bottom = bottom + 5
            #     right = right + 5
            #
            #     top = max(0, np.floor(top + 0.5).astype('int32'))
            #     left = max(0, np.floor(left + 0.5).astype('int32'))
            #     bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32'))
            #     right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32'))
            #
            #     # 画框框
            #     label = '{} {:.2f}'.format(predicted_class, score)
            #     draw = ImageDraw.Draw(image)
            #     label_size = draw.textsize(label, font)
            #     label = label.encode('utf-8')
            #     print(label)
            #
            #     if top - label_size[1] >= 0:
            #         text_origin = np.array([left, top - label_size[1]])
            #     else:
            #         text_origin = np.array([left, top + 1])
            #
            #     for i in range(thickness):
            #         draw.rectangle(
            #             [left + i, top + i, right - i, bottom - i],
            #             outline=self.colors[int(c)])
            #     draw.rectangle(
            #         [tuple(text_origin), tuple(text_origin + label_size)],
            #         fill=self.colors[int(c)])
            #     draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font)
            #     del draw
            #
            # image.save(savepath)
        if np.size(f, 0) != 4:
            print(
                "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
            )
        return {
            'image_id': image_id,
            'image_h': height,
            'image_w': width,
            'num_boxes': np.size(bbox, 0),
            'boxes': base64.b64encode(bbox),
            'features': base64.b64encode(f)
        }
示例#5
0
文件: frcnn.py 项目: innvoker/MIL
    def detect_image(self, image):
        with torch.no_grad():
            start_time = time.time()
            image_shape = np.array(np.shape(image)[0:2])
            old_width = image_shape[1]
            old_height = image_shape[0]
            old_image = copy.deepcopy(image)
            width, height = get_new_img_size(old_width, old_height)

            image = image.resize([width, height], Image.BICUBIC)
            photo = np.array(image, dtype=np.float32) / 255
            photo = np.transpose(photo, (2, 0, 1))

            images = []
            images.append(photo)
            images = np.asarray(images)
            images = torch.from_numpy(images)
            if self.cuda:
                images = images.cuda()

    #非凸性优化,边界框矫正
            roi_cls_locs, roi_scores, rois, roi_indices = self.model(images)
            decodebox = DecodeBox(self.std, self.mean, self.num_classes)
            outputs = decodebox.forward(roi_cls_locs,
                                        roi_scores,
                                        rois,
                                        height=height,
                                        width=width,
                                        nms_iou=self.iou,
                                        score_thresh=self.confidence)
            if len(outputs) == 0:
                return old_image
            bbox = outputs[:, :4]
            conf = outputs[:, 4]
            label = outputs[:, 5]

            bbox[:, 0::2] = (bbox[:, 0::2]) / width * old_width
            bbox[:, 1::2] = (bbox[:, 1::2]) / height * old_height
            bbox = np.array(bbox, np.int32)

        image = old_image
        thickness = (np.shape(old_image)[0] +
                     np.shape(old_image)[1]) // old_width * 2
        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        for i, c in enumerate(label):
            predicted_class = self.class_names[int(c)]
            score = conf[i]

            left, top, right, bottom = bbox[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            #图片输出
            label = '{}'.format(predicted_class)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[int(c)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[int(c)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw

        print("time:", time.time() - start_time)
        return image
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])
        old_width, old_height = image_shape[1], image_shape[0]
        old_image = copy.deepcopy(image)

        #---------------------------------------------------------#
        #   Reset the original image to the size of 600 short edges
        #---------------------------------------------------------#
        width, height = get_new_img_size(old_width, old_height)
        image = image.resize([width, height], Image.BICUBIC)
        photo = np.array(image, dtype=np.float64)

        #-----------------------------------------------------------#
        #   Image preprocessing and normalization.
        #-----------------------------------------------------------#
        photo = preprocess_input(np.expand_dims(photo, 0))
        rpn_pred = self.model_rpn.predict(photo)

        #-----------------------------------------------------------#
        #   The prediction result of the suggestion box network is decoded
        #-----------------------------------------------------------#
        base_feature_width, base_feature_height = self.get_img_output_length(
            width, height)
        anchors = get_anchors([base_feature_width, base_feature_height], width,
                              height)
        rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors)

        #-------------------------------------------------------------#
        #   After obtaining the suggestion box and the shared feature layer, they are passed into the classifier for prediction
        #-------------------------------------------------------------#
        base_layer = rpn_pred[2]
        proposal_box = np.array(rpn_results)[:, :, 1:]
        temp_ROIs = np.zeros_like(proposal_box)
        temp_ROIs[:, :, [0, 1, 2, 3]] = proposal_box[:, :, [1, 0, 3, 2]]
        classifier_pred = self.model_classifier.predict(
            [base_layer, temp_ROIs])

        #-------------------------------------------------------------#
        #   The prediction frame is obtained by decoding the suggestion box by using the prediction results of classifier
        #-------------------------------------------------------------#
        results = self.bbox_util.detection_out_classifier(
            classifier_pred, proposal_box, self.config, self.confidence)

        if len(results[0]) == 0:
            return old_image

        results = np.array(results[0])
        boxes = results[:, :4]
        top_conf = results[:, 4]
        top_label_indices = results[:, 5]
        boxes[:, [0, 2]] = boxes[:, [0, 2]] * old_width
        boxes[:, [1, 3]] = boxes[:, [1, 3]] * old_height

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = max(
            (np.shape(old_image)[0] + np.shape(old_image)[1]) // old_width * 2,
            1)

        image = old_image
        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c)]
            score = top_conf[i]

            left, top, right, bottom = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label, top, left, bottom, right)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[int(c)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[int(c)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        return image
示例#7
0
    def get_FPS(self, image, test_interval):
        #-------------------------------------#
        #   转换成RGB图片,可以用于灰度图预测。
        #-------------------------------------#
        image = image.convert("RGB")
        
        image_shape = np.array(np.shape(image)[0:2])
        old_width, old_height = image_shape[1], image_shape[0]
    
        #---------------------------------------------------------#
        #   给原图像进行resize,resize到短边为600的大小上
        #---------------------------------------------------------#
        width, height = get_new_img_size(old_width, old_height)
        image = image.resize([width,height], Image.BICUBIC)
        photo = np.array(image,dtype = np.float64)

        #-----------------------------------------------------------#
        #   图片预处理,归一化。
        #-----------------------------------------------------------#
        photo = preprocess_input(np.expand_dims(photo,0))
        rpn_pred = self.model_rpn.predict(photo)

        #-----------------------------------------------------------#
        #   将建议框网络的预测结果进行解码
        #-----------------------------------------------------------#
        base_feature_width, base_feature_height = self.get_img_output_length(width, height)
        anchors = get_anchors([base_feature_width, base_feature_height], width, height)
        rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors)
        
        #-------------------------------------------------------------#
        #   在获得建议框和共享特征层后,将二者传入classifier中进行预测
        #-------------------------------------------------------------#
        base_layer = rpn_pred[2]
        proposal_box = np.array(rpn_results)[:, :, 1:]
        temp_ROIs = np.zeros_like(proposal_box)
        temp_ROIs[:, :, [0, 1, 2, 3]] = proposal_box[:, :, [1, 0, 3, 2]]
        classifier_pred = self.model_classifier.predict([base_layer, temp_ROIs])
        
        #-------------------------------------------------------------#
        #   利用classifier的预测结果对建议框进行解码,获得预测框
        #-------------------------------------------------------------#
        results = self.bbox_util.detection_out_classifier(classifier_pred, proposal_box, self.config, self.confidence)

        if len(results[0])>0:
            results = np.array(results[0])
            boxes = results[:, :4]
            top_conf = results[:, 4]
            top_label_indices = results[:, 5]
            boxes[:, [0, 2]] = boxes[:, [0, 2]] * old_width
            boxes[:, [1, 3]] = boxes[:, [1, 3]] * old_height

        t1 = time.time()
        for _ in range(test_interval):
            rpn_pred = self.model_rpn.predict(photo)

            #-----------------------------------------------------------#
            #   将建议框网络的预测结果进行解码
            #-----------------------------------------------------------#
            base_feature_width, base_feature_height = self.get_img_output_length(width, height)
            anchors = get_anchors([base_feature_width, base_feature_height], width, height)
            rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors)
            
            #-------------------------------------------------------------#
            #   在获得建议框和共享特征层后,将二者传入classifier中进行预测
            #-------------------------------------------------------------#
            base_layer = rpn_pred[2]
            proposal_box = np.array(rpn_results)[:, :, 1:]
            temp_ROIs = np.zeros_like(proposal_box)
            temp_ROIs[:, :, [0, 1, 2, 3]] = proposal_box[:, :, [1, 0, 3, 2]]
            classifier_pred = self.model_classifier.predict([base_layer, temp_ROIs])
            
            #-------------------------------------------------------------#
            #   利用classifier的预测结果对建议框进行解码,获得预测框
            #-------------------------------------------------------------#
            results = self.bbox_util.detection_out_classifier(classifier_pred, proposal_box, self.config, self.confidence)

            if len(results[0])>0:
                results = np.array(results[0])
                boxes = results[:, :4]
                top_conf = results[:, 4]
                top_label_indices = results[:, 5]
                boxes[:, [0, 2]] = boxes[:, [0, 2]] * old_width
                boxes[:, [1, 3]] = boxes[:, [1, 3]] * old_height

        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time
    def detect_image(self, image_id, image):
        self.confidence = 0.05
        f = open("./input/detection-results/" + image_id + ".txt", "w")

        image_shape = np.array(np.shape(image)[0:2])
        old_width = image_shape[1]
        old_height = image_shape[0]
        old_image = copy.deepcopy(image)
        width, height = get_new_img_size(old_width, old_height)

        image = image.resize([width, height])
        photo = np.array(image, dtype=np.float64)

        # 图片预处理,归一化
        photo = preprocess_input(np.expand_dims(photo, 0))
        preds = self.model_rpn.predict(photo)
        # 将预测结果进行解码
        anchors = get_anchors(self.get_img_output_length(width, height), width,
                              height)

        rpn_results = self.bbox_util.detection_out(preds,
                                                   anchors,
                                                   1,
                                                   confidence_threshold=0)
        R = rpn_results[0][:, 2:]

        R[:, 0] = np.array(np.round(R[:, 0] * width / self.config.rpn_stride),
                           dtype=np.int32)
        R[:, 1] = np.array(np.round(R[:, 1] * height / self.config.rpn_stride),
                           dtype=np.int32)
        R[:, 2] = np.array(np.round(R[:, 2] * width / self.config.rpn_stride),
                           dtype=np.int32)
        R[:, 3] = np.array(np.round(R[:, 3] * height / self.config.rpn_stride),
                           dtype=np.int32)

        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]
        base_layer = preds[2]

        delete_line = []
        for i, r in enumerate(R):
            if r[2] < 1 or r[3] < 1:
                delete_line.append(i)
        R = np.delete(R, delete_line, axis=0)

        bboxes = []
        probs = []
        labels = []
        for jk in range(R.shape[0] // self.config.num_rois + 1):
            ROIs = np.expand_dims(R[self.config.num_rois *
                                    jk:self.config.num_rois * (jk + 1), :],
                                  axis=0)

            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // self.config.num_rois:
                #pad R
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], self.config.num_rois,
                                curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            [P_cls, P_regr] = self.model_classifier.predict([base_layer, ROIs])

            for ii in range(P_cls.shape[1]):
                if np.max(P_cls[0, ii, :]) < self.confidence or np.argmax(
                        P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                    continue

                label = np.argmax(P_cls[0, ii, :])

                (x, y, w, h) = ROIs[0, ii, :]

                cls_num = np.argmax(P_cls[0, ii, :])

                (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                tx /= self.config.classifier_regr_std[0]
                ty /= self.config.classifier_regr_std[1]
                tw /= self.config.classifier_regr_std[2]
                th /= self.config.classifier_regr_std[3]

                cx = x + w / 2.
                cy = y + h / 2.
                cx1 = tx * w + cx
                cy1 = ty * h + cy
                w1 = math.exp(tw) * w
                h1 = math.exp(th) * h

                x1 = cx1 - w1 / 2.
                y1 = cy1 - h1 / 2.

                x2 = cx1 + w1 / 2
                y2 = cy1 + h1 / 2

                x1 = int(round(x1))
                y1 = int(round(y1))
                x2 = int(round(x2))
                y2 = int(round(y2))

                bboxes.append([x1, y1, x2, y2])
                probs.append(np.max(P_cls[0, ii, :]))
                labels.append(label)

        if len(bboxes) == 0:
            return old_image

        # 筛选出其中得分高于confidence的框
        labels = np.array(labels)
        probs = np.array(probs)
        boxes = np.array(bboxes, dtype=np.float32)
        boxes[:, 0] = boxes[:, 0] * self.config.rpn_stride / width
        boxes[:, 1] = boxes[:, 1] * self.config.rpn_stride / height
        boxes[:, 2] = boxes[:, 2] * self.config.rpn_stride / width
        boxes[:, 3] = boxes[:, 3] * self.config.rpn_stride / height
        results = np.array(
            self.bbox_util.nms_for_out(np.array(labels), np.array(probs),
                                       np.array(boxes), self.num_classes - 1,
                                       0.4))

        top_label_indices = results[:, 0]
        top_conf = results[:, 1]
        boxes = results[:, 2:]
        boxes[:, 0] = boxes[:, 0] * old_width
        boxes[:, 1] = boxes[:, 1] * old_height
        boxes[:, 2] = boxes[:, 2] * old_width
        boxes[:, 3] = boxes[:, 3] * old_height

        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c)]
            score = str(top_conf[i])

            left, top, right, bottom = boxes[i]
            f.write("%s %s %s %s %s %s\n" %
                    (predicted_class, score[:6], str(int(left)), str(
                        int(top)), str(int(right)), str(int(bottom))))

        f.close()
        return
示例#9
0
    def detect_image(self, image):
        #-------------------------------------#
        #   转换成RGB图片,可以用于灰度图预测。
        #-------------------------------------#
        image = image.convert("RGB")

        image_shape = np.array(np.shape(image)[0:2])
        old_width, old_height = image_shape[1], image_shape[0]
        old_image = copy.deepcopy(image)

        #---------------------------------------------------------#
        #   给原图像进行resize,resize到短边为600的大小上
        #---------------------------------------------------------#
        width, height = get_new_img_size(old_width, old_height)
        image = image.resize([width, height], Image.BICUBIC)
        photo = np.array(image, dtype=np.float64)

        #-----------------------------------------------------------#
        #   图片预处理,归一化。
        #-----------------------------------------------------------#
        photo = preprocess_input(np.expand_dims(photo, 0))
        rpn_pred = self.model_rpn_get_pred(photo)
        rpn_pred = [x.numpy() for x in rpn_pred]

        #-----------------------------------------------------------#
        #   将建议框网络的预测结果进行解码
        #-----------------------------------------------------------#
        base_feature_width, base_feature_height = self.get_img_output_length(
            width, height)
        anchors = get_anchors([base_feature_width, base_feature_height], width,
                              height)
        rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors)

        #-------------------------------------------------------------#
        #   在获得建议框和共享特征层后,将二者传入classifier中进行预测
        #-------------------------------------------------------------#
        base_layer = rpn_pred[2]
        proposal_box = np.array(rpn_results)[:, :, 1:]
        temp_ROIs = np.zeros_like(proposal_box)
        temp_ROIs[:, :, [0, 1, 2, 3]] = proposal_box[:, :, [1, 0, 3, 2]]
        classifier_pred = self.model_classifier_get_pred(
            [base_layer, temp_ROIs])
        classifier_pred = [x.numpy() for x in classifier_pred]

        #-------------------------------------------------------------#
        #   利用classifier的预测结果对建议框进行解码,获得预测框
        #-------------------------------------------------------------#
        results = self.bbox_util.detection_out_classifier(
            classifier_pred, proposal_box, self.config, self.confidence)

        if len(results[0]) == 0:
            return old_image

        results = np.array(results[0])
        boxes = results[:, :4]
        top_conf = results[:, 4]
        top_label_indices = results[:, 5]
        boxes[:, [0, 2]] = boxes[:, [0, 2]] * old_width
        boxes[:, [1, 3]] = boxes[:, [1, 3]] * old_height

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = max(
            (np.shape(old_image)[0] + np.shape(old_image)[1]) // old_width * 2,
            1)

        image = old_image
        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c)]
            score = top_conf[i]

            left, top, right, bottom = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label, top, left, bottom, right)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[int(c)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[int(c)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        return image
示例#10
0
    def detect_image(self, image):
        # 1 处理图片
        image_shape = np.array(np.shape(image)[0:2])
        old_width = image_shape[1]
        old_height = image_shape[0]
        old_image = copy.deepcopy(image)
        # 1.1 图片resize,最短边600
        width,height = get_new_img_size(old_width,old_height)

        image = image.resize([width,height])
        photo = np.array(image,dtype = np.float64)

        # 2 图片预处理,归一化,导入rpn网络进行预测得到置信度和预测偏移值
        photo = preprocess_input(np.expand_dims(photo,0))
        preds = self.model_rpn.predict(photo)
        # 3 生成先验框
        anchors = get_anchors(self.get_img_output_length(width,height),width,height)
        # 4 将预测结果进行解码--nms筛选得到建议框 **********************
        rpn_results = self.bbox_util.detection_out(preds,anchors,1,confidence_threshold=0.8)
        # temp是解码后的建议框
        # temp = copy.deepcopy(rpn_results[0][:, 2:])
        R = rpn_results[0][:, 2:]  # r.num=300
        # 5 下面处理后的框是放在featureMap上,数值大小在0,38之间 左上角右下角
        R[:,0] = np.array(np.round(R[:, 0]*width/self.config.rpn_stride),dtype=np.int32)
        R[:,1] = np.array(np.round(R[:, 1]*height/self.config.rpn_stride),dtype=np.int32)
        R[:,2] = np.array(np.round(R[:, 2]*width/self.config.rpn_stride),dtype=np.int32)
        R[:,3] = np.array(np.round(R[:, 3]*height/self.config.rpn_stride),dtype=np.int32)
        
        R[:, 2] -= R[:, 0] # 得到建议框的左上角xy
        R[:, 3] -= R[:, 1] # 得到建议框的宽高wh
        base_layer = preds[2]
        
        delete_line = []
        for i,r in enumerate(R):
            if r[2] < 1 or r[3] < 1:
                delete_line.append(i)
        R = np.delete(R,delete_line,axis=0)
        
        bboxes = []
        probs = []
        labels = []
        # 6 分批次把建议框传入classifier
        for jk in range(R.shape[0]//self.config.num_rois + 1):   #  300//32+1
            ROIs = np.expand_dims(R[self.config.num_rois*jk:self.config.num_rois*(jk+1), :], axis=0)
            # ROIs=32
            if ROIs.shape[1] == 0:
                break
            # 对最后一次建议框填充到32个
            if jk == R.shape[0]//self.config.num_rois:
                # pad R
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0],self.config.num_rois,curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded
            # P_cls种类,置信度, P_regr
            [P_cls, P_regr] = self.model_classifier.predict([base_layer,ROIs]) # ROIs = 32

            for ii in range(P_cls.shape[1]):
                # 6.1 判断置信度是否大于阈值
                if np.max(P_cls[0, ii, :]) < self.confidence or np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                    continue
                # 建议框类别
                label = np.argmax(P_cls[0, ii, :])
                # 建议框坐标
                (x, y, w, h) = ROIs[0, ii, :]
                #
                cls_num = np.argmax(P_cls[0, ii, :])

                (tx, ty, tw, th) = P_regr[0, ii, 4*cls_num:4*(cls_num+1)]
                # 建议框的调整参数
                tx /= self.config.classifier_regr_std[0] # 8.0
                ty /= self.config.classifier_regr_std[1]
                tw /= self.config.classifier_regr_std[2] # 4.0
                th /= self.config.classifier_regr_std[3]
                # 6.2 解码 - 中心
                cx = x + w/2.
                cy = y + h/2.
                # 调整后的中心
                cx1 = tx * w + cx
                cy1 = ty * h + cy
                w1 = math.exp(tw) * w
                h1 = math.exp(th) * h
                # 左上角
                x1 = cx1 - w1/2.
                y1 = cy1 - h1/2.

                x2 = cx1 + w1/2
                y2 = cy1 + h1/2
                # 右上角
                x1 = int(round(x1))
                y1 = int(round(y1))
                x2 = int(round(x2))
                y2 = int(round(y2))

                bboxes.append([x1,y1,x2,y2])
                probs.append(np.max(P_cls[0, ii, :]))
                labels.append(label)

        if len(bboxes)==0:
            return old_image
        
        # 6.3 筛选出其中得分高于confidence的框
        labels = np.array(labels)
        probs = np.array(probs)
        boxes = np.array(bboxes,dtype=np.float32)
        # 把box转化为小数
        boxes[:,0] = boxes[:,0]*self.config.rpn_stride/width
        boxes[:,1] = boxes[:,1]*self.config.rpn_stride/height
        boxes[:,2] = boxes[:,2]*self.config.rpn_stride/width
        boxes[:,3] = boxes[:,3]*self.config.rpn_stride/height
        # 6.4 NMS
        results = np.array(self.bbox_util.nms_for_out(np.array(labels),np.array(probs),np.array(boxes),self.num_classes-1,0.4))
        
        top_label_indices = results[:,0]
        top_conf = results[:,1]
        boxes = results[:,2:]
        # 7 映射到原图
        boxes[:,0] = boxes[:,0]*old_width
        boxes[:,1] = boxes[:,1]*old_height
        boxes[:,2] = boxes[:,2]*old_width
        boxes[:,3] = boxes[:,3]*old_height

        font = ImageFont.truetype(font='model_data/simhei.ttf',size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32'))
        
        thickness = (np.shape(old_image)[0] + np.shape(old_image)[1]) // width
        image = old_image
        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c)]
            score = top_conf[i]

            left, top, right, bottom = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32'))
            right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32'))

            # 画框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label)
            
            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle(
                    [left + i, top + i, right - i, bottom - i],
                    outline=self.colors[int(c)])
            draw.rectangle(
                [tuple(text_origin), tuple(text_origin + label_size)],
                fill=self.colors[int(c)])
            draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font)
            del draw
        return image