예제 #1
0
    def detect_image(self, image):
        image = [np.array(image)]
        molded_images, image_metas, windows = mold_inputs(self.config, image)

        image_shape = molded_images[0].shape
        anchors = get_anchors(self.config, image_shape)
        anchors = np.broadcast_to(anchors, (1, ) + anchors.shape)

        detections, _, _, mrcnn_mask, _, _, _ =\
            self.model.predict([molded_images, image_metas, anchors], verbose=0)

        final_rois, final_class_ids, final_scores, final_masks =\
            unmold_detections(detections[0], mrcnn_mask[0],
                                    image[0].shape, molded_images[0].shape,
                                    windows[0])

        r = {
            "rois": final_rois,
            "class_ids": final_class_ids,
            "scores": final_scores,
            "masks": final_masks,
        }

        visualize.display_instances(image[0], r['rois'], r['masks'],
                                    r['class_ids'], self.class_names,
                                    r['scores'])
예제 #2
0
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        for name, value in kwargs.items():
            setattr(self, name, value)
        #---------------------------------------------------#
        #   计算总的类的数量
        #---------------------------------------------------#
        self.class_names, self.num_classes = get_classes(self.classes_path)
        self.anchors = torch.from_numpy(
            get_anchors(self.input_shape, self.anchors_size,
                        self.backbone)).type(torch.FloatTensor)
        if self.cuda:
            self.anchors = self.anchors.cuda()
        self.num_classes = self.num_classes + 1

        #---------------------------------------------------#
        #   画框设置不同的颜色
        #---------------------------------------------------#
        hsv_tuples = [(x / self.num_classes, 1., 1.)
                      for x in range(self.num_classes)]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

        self.bbox_util = BBoxUtility(self.num_classes)
        self.generate()

        show_config(**self._defaults)
예제 #3
0
    def forward(self, h, img_size, scale=1.):
        """

        Forward Region Proposal Network.

        """
        n_pre_nms = 12000
        n_post_nms = 2000
        nms_thresh = 0.7

        # get anchors predifined
        n, _, hh, ww = h.shape
        anchors = get_anchors(self.anchor_base, self.feat_stride, hh, ww)

        # main forward
        hidd = F.relu(self.conv1(h))
        rpn_locs = self.loc(hidd)
        rpn_scores = self.score(hidd)

        # rpn_locs, rpn_scores
        rpn_locs = rpn_locs.permute(0, 2, 3, 1).contiguous().view(n, -1, 4)
        rpn_scores = rpn_scores.permute(0, 2, 3, 1).contiguous().view(n, -1, 2)
        scores = rpn_scores[:, :, 1].data.cpu().numpy()[0]

        # get rois, roi_indices
        rois = get_rois_from_loc_anchors(anchors,
                                         rpn_locs[0].data.cpu().numpy())

        # clip
        rois[:, ::2] = np.clip(rois[:, ::2], 0, img_size[0])
        rois[:, 1::2] = np.clip(rois[:, 1::2], 0, img_size[1])

        # remove < min_size
        min_size = 16
        min_size = min_size * scale
        hs = rois[:, 2] - rois[:, 0]
        ws = rois[:, 3] - rois[:, 1]

        if t.is_tensor(min_size):
            min_size = min_size.numpy()

        keep = np.where((hs >= min_size) & (ws >= min_size))[0]
        rois = rois[keep, :]
        scores = scores[keep]

        # Sort all (proposal, score) pairs by score from highest to lowest.
        # Take top pre_nms_topN (e.g. 6000).
        order = scores.ravel().argsort()[::-1]
        if n_pre_nms > 0:
            order = order[:n_pre_nms]
        rois = rois[order, :]

        # NMS
        keep = py_cpu_nms(rois, nms_thresh)
        keep = keep[:n_post_nms]
        rois = rois[keep]
        return rpn_locs, rpn_scores, rois, [0] * len(rois), anchors
예제 #4
0
    def generate(self):
        while True:
            shuffle(self.train_lines)
            lines = self.train_lines
            for annotation_line in lines:  
                img,y=self.get_random_data(annotation_line)
                height, width, _ = np.shape(img)
                
                if len(y)==0:
                    continue
                boxes = np.array(y[:,:4],dtype=np.float32)
                boxes[:,0] = boxes[:,0]/width
                boxes[:,1] = boxes[:,1]/height
                boxes[:,2] = boxes[:,2]/width
                boxes[:,3] = boxes[:,3]/height
                
                box_heights = boxes[:,3] - boxes[:,1]
                box_widths = boxes[:,2] - boxes[:,0]
                if (box_heights<=0).any() or (box_widths<=0).any():
                    continue

                y[:,:4] = boxes[:,:4]

                anchors = get_anchors(get_img_output_length(width,height),width,height)
                
                # 计算真实框对应的先验框,与这个先验框应当有的预测结果
                assignment = self.bbox_util.assign_boxes(y,anchors)

                num_regions = 256
                
                classification = assignment[:,4]
                regression = assignment[:,:]
                
                mask_pos = classification[:]>0
                num_pos = len(classification[mask_pos])
                if num_pos > num_regions/2:
                    val_locs = random.sample(range(num_pos), int(num_pos - num_regions/2))
                    classification[mask_pos][val_locs] = -1
                    regression[mask_pos][val_locs,-1] = -1
                
                mask_neg = classification[:]==0
                num_neg = len(classification[mask_neg])
                if len(classification[mask_neg]) + num_pos > num_regions:
                    val_locs = random.sample(range(num_neg), int(num_neg - num_pos))
                    classification[mask_neg][val_locs] = -1
                    
                classification = np.reshape(classification,[-1,1])
                regression = np.reshape(regression,[-1,5])

                tmp_inp = np.array(img)
                tmp_targets = [np.expand_dims(np.array(classification,dtype=np.float32),0),np.expand_dims(np.array(regression,dtype=np.float32),0)]

                yield preprocess_input(np.expand_dims(tmp_inp,0)), tmp_targets, np.expand_dims(y,0)
예제 #5
0
    def get_FPS(self, image, test_interval):
        image_shape = np.array(np.shape(image)[0:2])
        #---------------------------------------------------------#
        #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
        #---------------------------------------------------------#
        image = cvtColor(image)
        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        #---------------------------------------------------------#
        image_data, image_metas, windows = resize_image([np.array(image)],
                                                        self.config)
        #---------------------------------------------------------#
        #   根据当前输入图像的大小,生成先验框
        #---------------------------------------------------------#
        anchors = np.expand_dims(get_anchors(self.config, image_data[0].shape),
                                 0)
        #---------------------------------------------------------#
        #   将图像输入网络当中进行预测!
        #---------------------------------------------------------#
        detections, _, _, mrcnn_mask, _, _, _ = self.model.predict(
            [image_data, image_metas, anchors], verbose=0)

        #---------------------------------------------------#
        #   上面获得的预测结果是相对于padding后的图片的
        #   我们需要将预测结果转换到原图上
        #---------------------------------------------------#
        box_thre, class_thre, class_ids, masks_arg, masks_sigmoid = postprocess(
            detections[0], mrcnn_mask[0], image_shape, image_data[0].shape,
            windows[0])

        t1 = time.time()
        for _ in range(test_interval):
            #---------------------------------------------------------#
            #   将图像输入网络当中进行预测!
            #---------------------------------------------------------#
            detections, _, _, mrcnn_mask, _, _, _ = self.model.predict(
                [image_data, image_metas, anchors], verbose=0)

            #---------------------------------------------------#
            #   上面获得的预测结果是相对于padding后的图片的
            #   我们需要将预测结果转换到原图上
            #---------------------------------------------------#
            box_thre, class_thre, class_ids, masks_arg, masks_sigmoid = postprocess(
                detections[0], mrcnn_mask[0], image_shape, image_data[0].shape,
                windows[0])
        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time
예제 #6
0
    def get_map_out(self, image):
        image_shape = np.array(np.shape(image)[0:2])
        #---------------------------------------------------------#
        #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
        #---------------------------------------------------------#
        image = cvtColor(image)
        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        #---------------------------------------------------------#
        image_data, image_metas, windows = resize_image([np.array(image)],
                                                        self.config)
        #---------------------------------------------------------#
        #   根据当前输入图像的大小,生成先验框
        #---------------------------------------------------------#
        anchors = np.expand_dims(get_anchors(self.config, image_data[0].shape),
                                 0)
        #---------------------------------------------------------#
        #   将图像输入网络当中进行预测!
        #---------------------------------------------------------#
        detections, _, _, mrcnn_mask, _, _, _ = self.model.predict(
            [image_data, image_metas, anchors], verbose=0)

        #---------------------------------------------------#
        #   上面获得的预测结果是相对于padding后的图片的
        #   我们需要将预测结果转换到原图上
        #---------------------------------------------------#
        box_thre, class_thre, class_ids, masks_arg, masks_sigmoid = postprocess(
            detections[0], mrcnn_mask[0], image_shape, image_data[0].shape,
            windows[0])

        outboxes = None
        if box_thre is not None:
            outboxes = np.zeros_like(box_thre)
            outboxes[:, [0, 2]] = box_thre[:, [1, 3]]
            outboxes[:, [1, 3]] = box_thre[:, [0, 2]]
        return outboxes, class_thre, class_ids, masks_arg, masks_sigmoid
예제 #7
0
                print(
                    'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'
                    .format(mean_overlapping_bboxes, EPOCH_LENGTH))
                if mean_overlapping_bboxes == 0:
                    print(
                        'RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.'
                    )

            X, Y, boxes = batch[0], batch[1], batch[2]

            loss_rpn = model_rpn.train_on_batch(X, Y)
            write_log(callback, ['rpn_cls_loss', 'rpn_reg_loss'], loss_rpn,
                      train_step)
            P_rpn = model_rpn.predict_on_batch(X)
            height, width, _ = np.shape(X[0])
            anchors = get_anchors(get_img_output_length(width, height), width,
                                  height)

            # 将预测结果进行解码
            results = bbox_util.detection_out(P_rpn,
                                              anchors,
                                              1,
                                              confidence_threshold=0)

            R = results[0][:, 2:]

            X2, Y1, Y2, IouS = calc_iou(R, config, boxes[0], width, height,
                                        NUM_CLASSES)

            if X2 is None:
                rpn_accuracy_rpn_monitor.append(0)
                rpn_accuracy_for_epoch.append(0)
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])
        old_width, old_height = image_shape[1], image_shape[0]
        old_image = copy.deepcopy(image)

        #---------------------------------------------------------#
        #   Reset the original image to the size of 600 short edges
        #---------------------------------------------------------#
        width, height = get_new_img_size(old_width, old_height)
        image = image.resize([width, height], Image.BICUBIC)
        photo = np.array(image, dtype=np.float64)

        #-----------------------------------------------------------#
        #   Image preprocessing and normalization.
        #-----------------------------------------------------------#
        photo = preprocess_input(np.expand_dims(photo, 0))
        rpn_pred = self.model_rpn.predict(photo)

        #-----------------------------------------------------------#
        #   The prediction result of the suggestion box network is decoded
        #-----------------------------------------------------------#
        base_feature_width, base_feature_height = self.get_img_output_length(
            width, height)
        anchors = get_anchors([base_feature_width, base_feature_height], width,
                              height)
        rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors)

        #-------------------------------------------------------------#
        #   After obtaining the suggestion box and the shared feature layer, they are passed into the classifier for prediction
        #-------------------------------------------------------------#
        base_layer = rpn_pred[2]
        proposal_box = np.array(rpn_results)[:, :, 1:]
        temp_ROIs = np.zeros_like(proposal_box)
        temp_ROIs[:, :, [0, 1, 2, 3]] = proposal_box[:, :, [1, 0, 3, 2]]
        classifier_pred = self.model_classifier.predict(
            [base_layer, temp_ROIs])

        #-------------------------------------------------------------#
        #   The prediction frame is obtained by decoding the suggestion box by using the prediction results of classifier
        #-------------------------------------------------------------#
        results = self.bbox_util.detection_out_classifier(
            classifier_pred, proposal_box, self.config, self.confidence)

        if len(results[0]) == 0:
            return old_image

        results = np.array(results[0])
        boxes = results[:, :4]
        top_conf = results[:, 4]
        top_label_indices = results[:, 5]
        boxes[:, [0, 2]] = boxes[:, [0, 2]] * old_width
        boxes[:, [1, 3]] = boxes[:, [1, 3]] * old_height

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = max(
            (np.shape(old_image)[0] + np.shape(old_image)[1]) // old_width * 2,
            1)

        image = old_image
        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c)]
            score = top_conf[i]

            left, top, right, bottom = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label, top, left, bottom, right)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[int(c)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[int(c)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        return image
예제 #9
0
from keras.optimizers import Adam
from nets.M2det_training import Generator
from nets.M2det_training import conf_loss, smooth_l1
from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from utils.utils import BBoxUtility
from utils.anchors import get_anchors

if __name__ == "__main__":
    NUM_CLASSES = 21
    input_shape = (320, 320, 3)
    annotation_path = '2007_train.txt'

    inputs = keras.layers.Input(shape=input_shape)
    model = M2det.m2det(NUM_CLASSES, inputs)

    priors = get_anchors((input_shape[0], input_shape[1]))
    bbox_util = BBoxUtility(NUM_CLASSES, priors)

    model.load_weights("model_data\M2det_weights.h5",
                       by_name=True,
                       skip_mismatch=True)
    model.summary()
    # 0.1用于验证,0.9用于训练
    val_split = 0.1
    with open(annotation_path) as f:
        lines = f.readlines()
    np.random.seed(10101)
    np.random.shuffle(lines)
    np.random.seed(None)
    num_val = int(len(lines) * val_split)
    num_train = len(lines) - num_val
예제 #10
0
    def generate(self):
        while True:
            shuffle(self.train_lines)
            lines = self.train_lines # [拉曼光谱数据 x1,1,x2,1,0 拉曼光谱数据....]
            for annotation_line in lines:  
                raman,y=self.get_random_data(annotation_line)
                width, height, _ = np.shape(raman) # width:1044 height:1   _=1
                
                if len(y)==0:
                    continue
                boxes = np.array(y[:,:4],dtype=np.float32)
                boxes[:,0] = boxes[:,0]/width
                boxes[:,1] = boxes[:,1]/height
                boxes[:,2] = boxes[:,2]/width
                boxes[:,3] = boxes[:,3]/height
                
                # box_heights = boxes[:,3] - boxes[:,1]
                # box_widths = boxes[:,2] - boxes[:,0]
                # if (box_heights<=0).any() or (box_widths<=0).any():
                #     continue

                y[:,:4] = boxes[:,:4]

                anchors = get_anchors((66,1),1044,1)
                
                # 计算真实框对应的先验框,与这个先验框应当有的预测结果
                assignment = self.bbox_util.assign_boxes(y,anchors)

                num_regions = 16
                
                classification = assignment[:,4]
                regression = assignment[:,:]
                
                mask_pos = classification[:]>0
                num_pos = len(classification[mask_pos])
                if num_pos > num_regions/2:
                    val_locs = random.sample(range(num_pos), int(num_pos - num_regions/2))
                    temp_classification = classification[mask_pos]
                    temp_regression = regression[mask_pos]
                    temp_classification[val_locs] = -1
                    temp_regression[val_locs,-1] = -1
                    classification[mask_pos] = temp_classification
                    regression[mask_pos] = temp_regression
                    
                mask_neg = classification[:]==0
                num_neg = len(classification[mask_neg])
                mask_pos = classification[:]>0
                num_pos = len(classification[mask_pos])
                if len(classification[mask_neg]) + num_pos > num_regions:
                    val_locs = random.sample(range(num_neg), int(num_neg + num_pos - num_regions))
                    temp_classification = classification[mask_neg]
                    temp_classification[val_locs] = -1
                    classification[mask_neg] = temp_classification
                    
                classification = np.reshape(classification,[-1,1])
                regression = np.reshape(regression,[-1,5])

                tmp_inp = np.array(raman)

                tmp_inp = (tmp_inp - (np.min(tmp_inp)))/(np.max(tmp_inp) - np.min(tmp_inp))
                tmp_targets = [np.expand_dims(np.array(classification,dtype=np.float32),0),np.expand_dims(np.array(regression,dtype=np.float32),0)]

                yield np.expand_dims(tmp_inp,0), tmp_targets, np.expand_dims(y,0)
예제 #11
0
    train_annotation_path = '2007_train.txt'
    val_annotation_path = '2007_val.txt'

    #------------------------------------------------------#
    #   设置用到的显卡
    #------------------------------------------------------#
    os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(str(x) for x in train_gpu)
    ngpus_per_node = len(train_gpu)
    print('Number of devices: {}'.format(ngpus_per_node))

    #----------------------------------------------------#
    #   获取classes和anchor
    #----------------------------------------------------#
    class_names, num_classes = get_classes(classes_path)
    num_classes += 1
    anchors = get_anchors(input_shape, anchors_size)

    K.clear_session()
    model_body = m2det((input_shape[0], input_shape[1], 3), num_classes)
    if model_path != '':
        #------------------------------------------------------#
        #   载入预训练权重
        #------------------------------------------------------#
        print('Load weights {}.'.format(model_path))
        model_body.load_weights(model_path, by_name=True, skip_mismatch=True)

    if ngpus_per_node > 1:
        model = multi_gpu_model(model_body, gpus=ngpus_per_node)
    else:
        model = model_body
예제 #12
0
import numpy as np
import keras
from keras.optimizers import Adam
from nets.retinanet_training import Generator
from nets.retinanet_training import focal, smooth_l1
from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from utils.utils import BBoxUtility
from utils.anchors import get_anchors

if __name__ == "__main__":
    NUM_CLASSES = 20
    input_shape = (600, 600, 3)
    annotation_path = '2007_train.txt'
    inputs = keras.layers.Input(shape=input_shape)
    model = retinanet.resnet_retinanet(NUM_CLASSES, inputs)
    priors = get_anchors(model)
    bbox_util = BBoxUtility(NUM_CLASSES, priors)

    #-------------------------------------------#
    #   权值文件的下载请看README
    #-------------------------------------------#
    model.load_weights("model_data/resnet50_coco_best_v2.1.0.h5",
                       by_name=True,
                       skip_mismatch=True)

    # 0.1用于验证,0.9用于训练
    val_split = 0.1
    with open(annotation_path) as f:
        lines = f.readlines()
    np.random.seed(10101)
    np.random.shuffle(lines)
예제 #13
0
    def get_map_txt(self, image_id, image, class_names, map_out_path):
        f = open(
            os.path.join(map_out_path,
                         "detection-results/" + image_id + ".txt"), "w")
        #---------------------------------------------------#
        #   计算输入图片的高和宽
        #---------------------------------------------------#
        image_shape = np.array(np.shape(image)[0:2])
        input_shape = get_new_img_size(image_shape[0], image_shape[1])
        #---------------------------------------------------------#
        #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
        #---------------------------------------------------------#
        image = cvtColor(image)
        #---------------------------------------------------------#
        #   给原图像进行resize,resize到短边为600的大小上
        #---------------------------------------------------------#
        image_data = resize_image(image, [input_shape[1], input_shape[0]])
        #---------------------------------------------------------#
        #   添加上batch_size维度
        #---------------------------------------------------------#
        image_data = np.expand_dims(
            preprocess_input(np.array(image_data, dtype='float32')), 0)

        #---------------------------------------------------------#
        #   获得rpn网络预测结果和base_layer
        #---------------------------------------------------------#
        rpn_pred = self.model_rpn(image_data)
        rpn_pred = [x.numpy() for x in rpn_pred]
        #---------------------------------------------------------#
        #   生成先验框并解码
        #---------------------------------------------------------#
        anchors = get_anchors(input_shape, self.backbone, self.anchors_size)
        rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors)

        #-------------------------------------------------------------#
        #   利用建议框获得classifier网络预测结果
        #-------------------------------------------------------------#
        classifier_pred = self.model_classifier(
            [rpn_pred[2], rpn_results[:, :, [1, 0, 3, 2]]])
        classifier_pred = [x.numpy() for x in classifier_pred]
        #-------------------------------------------------------------#
        #   利用classifier的预测结果对建议框进行解码,获得预测框
        #-------------------------------------------------------------#
        results = self.bbox_util.detection_out_classifier(
            classifier_pred, rpn_results, image_shape, input_shape,
            self.confidence)

        #--------------------------------------#
        #   如果没有检测到物体,则返回原图
        #--------------------------------------#
        if len(results[0]) <= 0:
            return

        top_label = np.array(results[0][:, 5], dtype='int32')
        top_conf = results[0][:, 4]
        top_boxes = results[0][:, :4]

        for i, c in list(enumerate(top_label)):
            predicted_class = self.class_names[int(c)]
            box = top_boxes[i]
            score = str(top_conf[i])

            top, left, bottom, right = box

            if predicted_class not in class_names:
                continue

            f.write("%s %s %s %s %s %s\n" %
                    (predicted_class, score[:6], str(int(left)), str(
                        int(top)), str(int(right)), str(int(bottom))))

        f.close()
        return
예제 #14
0
파일: train.py 프로젝트: ttyhu/RFB-keras
import numpy as np
import pickle
import tensorflow as tf
import cv2
import keras
import os
import sys

 
if __name__ == "__main__":
    log_dir = "logs/"
    annotation_path = '2007_train.txt'
    
    NUM_CLASSES = 21
    input_shape = (300, 300, 3)
    priors = get_anchors()
    bbox_util = BBoxUtility(NUM_CLASSES, priors)

    # 0.1用于验证,0.9用于训练
    val_split = 0.1
    with open(annotation_path) as f:
        lines = f.readlines()
    np.random.seed(10101)
    np.random.shuffle(lines)
    np.random.seed(None)
    num_val = int(len(lines)*val_split)
    num_train = len(lines) - num_val
    
    model = rfb300(input_shape, num_classes=NUM_CLASSES)
    model.load_weights("model_data/rfb_weights.h5", by_name=True, skip_mismatch=True)
    # 训练参数设置
예제 #15
0
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])
        old_width = image_shape[1]
        old_height = image_shape[0]
        old_image = copy.deepcopy(image)
        width, height = get_new_img_size(old_width, old_height)

        image = image.resize([width, height])
        photo = np.array(image, dtype=np.float64)

        # 图片预处理,归一化
        photo = preprocess_input(np.expand_dims(photo, 0))
        preds = self.model_rpn.predict(photo)
        # 将预测结果进行解码
        anchors = get_anchors(self.get_img_output_length(width, height), width,
                              height)

        rpn_results = self.bbox_util.detection_out(preds,
                                                   anchors,
                                                   1,
                                                   confidence_threshold=0)
        R = rpn_results[0][:, 2:]

        R[:, 0] = np.array(np.round(R[:, 0] * width / self.config.rpn_stride),
                           dtype=np.int32)
        R[:, 1] = np.array(np.round(R[:, 1] * height / self.config.rpn_stride),
                           dtype=np.int32)
        R[:, 2] = np.array(np.round(R[:, 2] * width / self.config.rpn_stride),
                           dtype=np.int32)
        R[:, 3] = np.array(np.round(R[:, 3] * height / self.config.rpn_stride),
                           dtype=np.int32)

        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]
        base_layer = preds[2]

        delete_line = []
        for i, r in enumerate(R):
            if r[2] < 1 or r[3] < 1:
                delete_line.append(i)
        R = np.delete(R, delete_line, axis=0)

        bboxes = []
        probs = []
        labels = []
        for jk in range(R.shape[0] // self.config.num_rois + 1):
            ROIs = np.expand_dims(R[self.config.num_rois *
                                    jk:self.config.num_rois * (jk + 1), :],
                                  axis=0)

            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // self.config.num_rois:
                #pad R
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], self.config.num_rois,
                                curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            [P_cls, P_regr] = self.model_classifier.predict([base_layer, ROIs])

            for ii in range(P_cls.shape[1]):
                if np.max(P_cls[0, ii, :-1]) < self.confidence:
                    continue

                label = np.argmax(P_cls[0, ii, :-1])

                (x, y, w, h) = ROIs[0, ii, :]

                cls_num = np.argmax(P_cls[0, ii, :-1])

                (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                tx /= self.config.classifier_regr_std[0]
                ty /= self.config.classifier_regr_std[1]
                tw /= self.config.classifier_regr_std[2]
                th /= self.config.classifier_regr_std[3]

                cx = x + w / 2.
                cy = y + h / 2.
                cx1 = tx * w + cx
                cy1 = ty * h + cy
                w1 = math.exp(tw) * w
                h1 = math.exp(th) * h

                x1 = cx1 - w1 / 2.
                y1 = cy1 - h1 / 2.

                x2 = cx1 + w1 / 2
                y2 = cy1 + h1 / 2

                x1 = int(round(x1))
                y1 = int(round(y1))
                x2 = int(round(x2))
                y2 = int(round(y2))

                bboxes.append([x1, y1, x2, y2])
                probs.append(np.max(P_cls[0, ii, :-1]))
                labels.append(label)

        if len(bboxes) == 0:
            return old_image

        # 筛选出其中得分高于confidence的框
        labels = np.array(labels)
        probs = np.array(probs)
        boxes = np.array(bboxes, dtype=np.float32)
        boxes[:, 0] = boxes[:, 0] * self.config.rpn_stride / width
        boxes[:, 1] = boxes[:, 1] * self.config.rpn_stride / height
        boxes[:, 2] = boxes[:, 2] * self.config.rpn_stride / width
        boxes[:, 3] = boxes[:, 3] * self.config.rpn_stride / height
        results = np.array(
            self.bbox_util.nms_for_out(np.array(labels), np.array(probs),
                                       np.array(boxes), self.num_classes - 1,
                                       0.4))

        top_label_indices = results[:, 0]
        top_conf = results[:, 1]
        boxes = results[:, 2:]
        boxes[:, 0] = boxes[:, 0] * old_width
        boxes[:, 1] = boxes[:, 1] * old_height
        boxes[:, 2] = boxes[:, 2] * old_width
        boxes[:, 3] = boxes[:, 3] * old_height

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = (np.shape(old_image)[0] +
                     np.shape(old_image)[1]) // old_width * 2
        image = old_image
        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c)]
            score = top_conf[i]

            left, top, right, bottom = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[int(c)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[int(c)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        return image
예제 #16
0
 def _get_prior(self):
     data = get_anchors(self.retinanet_model)
     return data
예제 #17
0
    def detect_image(self, image_id, image):
        self.confidence = 0.05
        f = open("./input/detection-results/" + image_id + ".txt", "w")

        image_shape = np.array(np.shape(image)[0:2])
        old_width = image_shape[1]
        old_height = image_shape[0]
        old_image = copy.deepcopy(image)
        width, height = get_new_img_size(old_width, old_height)

        image = image.resize([width, height])
        photo = np.array(image, dtype=np.float64)

        # 图片预处理,归一化
        photo = preprocess_input(np.expand_dims(photo, 0))
        preds = self.model_rpn.predict(photo)
        # 将预测结果进行解码
        anchors = get_anchors(self.get_img_output_length(width, height), width,
                              height)

        rpn_results = self.bbox_util.detection_out(preds,
                                                   anchors,
                                                   1,
                                                   confidence_threshold=0)
        R = rpn_results[0][:, 2:]

        R[:, 0] = np.array(np.round(R[:, 0] * width / self.config.rpn_stride),
                           dtype=np.int32)
        R[:, 1] = np.array(np.round(R[:, 1] * height / self.config.rpn_stride),
                           dtype=np.int32)
        R[:, 2] = np.array(np.round(R[:, 2] * width / self.config.rpn_stride),
                           dtype=np.int32)
        R[:, 3] = np.array(np.round(R[:, 3] * height / self.config.rpn_stride),
                           dtype=np.int32)

        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]
        base_layer = preds[2]

        delete_line = []
        for i, r in enumerate(R):
            if r[2] < 1 or r[3] < 1:
                delete_line.append(i)
        R = np.delete(R, delete_line, axis=0)

        bboxes = []
        probs = []
        labels = []
        for jk in range(R.shape[0] // self.config.num_rois + 1):
            ROIs = np.expand_dims(R[self.config.num_rois *
                                    jk:self.config.num_rois * (jk + 1), :],
                                  axis=0)

            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // self.config.num_rois:
                #pad R
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], self.config.num_rois,
                                curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            [P_cls, P_regr] = self.model_classifier.predict([base_layer, ROIs])

            for ii in range(P_cls.shape[1]):
                if np.max(P_cls[0, ii, :]) < self.confidence or np.argmax(
                        P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                    continue

                label = np.argmax(P_cls[0, ii, :])

                (x, y, w, h) = ROIs[0, ii, :]

                cls_num = np.argmax(P_cls[0, ii, :])

                (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                tx /= self.config.classifier_regr_std[0]
                ty /= self.config.classifier_regr_std[1]
                tw /= self.config.classifier_regr_std[2]
                th /= self.config.classifier_regr_std[3]

                cx = x + w / 2.
                cy = y + h / 2.
                cx1 = tx * w + cx
                cy1 = ty * h + cy
                w1 = math.exp(tw) * w
                h1 = math.exp(th) * h

                x1 = cx1 - w1 / 2.
                y1 = cy1 - h1 / 2.

                x2 = cx1 + w1 / 2
                y2 = cy1 + h1 / 2

                x1 = int(round(x1))
                y1 = int(round(y1))
                x2 = int(round(x2))
                y2 = int(round(y2))

                bboxes.append([x1, y1, x2, y2])
                probs.append(np.max(P_cls[0, ii, :]))
                labels.append(label)

        if len(bboxes) == 0:
            return old_image

        # 筛选出其中得分高于confidence的框
        labels = np.array(labels)
        probs = np.array(probs)
        boxes = np.array(bboxes, dtype=np.float32)
        boxes[:, 0] = boxes[:, 0] * self.config.rpn_stride / width
        boxes[:, 1] = boxes[:, 1] * self.config.rpn_stride / height
        boxes[:, 2] = boxes[:, 2] * self.config.rpn_stride / width
        boxes[:, 3] = boxes[:, 3] * self.config.rpn_stride / height
        results = np.array(
            self.bbox_util.nms_for_out(np.array(labels), np.array(probs),
                                       np.array(boxes), self.num_classes - 1,
                                       0.4))

        top_label_indices = results[:, 0]
        top_conf = results[:, 1]
        boxes = results[:, 2:]
        boxes[:, 0] = boxes[:, 0] * old_width
        boxes[:, 1] = boxes[:, 1] * old_height
        boxes[:, 2] = boxes[:, 2] * old_width
        boxes[:, 3] = boxes[:, 3] * old_height

        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c)]
            score = str(top_conf[i])

            left, top, right, bottom = boxes[i]
            f.write("%s %s %s %s %s %s\n" %
                    (predicted_class, score[:6], str(int(left)), str(
                        int(top)), str(int(right)), str(int(bottom))))

        f.close()
        return
예제 #18
0
        print('Epoch {}/{}'.format(i + 1, EPOCH))
        for iteration, batch in enumerate(rpn_train):
            if len(rpn_accuracy_rpn_monitor) == EPOCH_LENGTH and config.verbose:
                mean_overlapping_bboxes = float(sum(rpn_accuracy_rpn_monitor))/len(rpn_accuracy_rpn_monitor)
                rpn_accuracy_rpn_monitor = []
                print('Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'.format(mean_overlapping_bboxes, EPOCH_LENGTH))
                if mean_overlapping_bboxes == 0:
                    print('RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.')
            
            X, Y, boxes = batch[0],batch[1],batch[2]
            
            loss_rpn = model_rpn.train_on_batch(X,Y)
            write_log(callback, ['rpn_cls_loss', 'rpn_reg_loss'], loss_rpn, train_step)
            P_rpn = model_rpn.predict_on_batch(X)
            width,height,_ = np.shape(X[0])
            anchors = get_anchors((66,1),width,height)
            
            # 将预测结果进行解码
            P_rpn[1][...,3] = 1
            anchors[:,1] = 0
            results = bbox_util.detection_out(P_rpn,anchors,1, confidence_threshold=0)
            
            R = results[0][:, 2:]

            X2, Y1, Y2, IouS = calc_iou(R, config, boxes[0], width, height, NUM_CLASSES)

            if X2 is None:
                rpn_accuracy_rpn_monitor.append(0)
                rpn_accuracy_for_epoch.append(0)
                continue
            
예제 #19
0
def get_train_model(config):
    h, w = config.IMAGE_SHAPE[:2]
    if h / 2**6 != int(h / 2**6) or w / 2**6 != int(w / 2**6):
        raise Exception("Image size must be dividable by 2 at least 6 times "
                        "to avoid fractions when downscaling and upscaling."
                        "For example, use 256, 320, 384, 448, 512, ... etc. ")

    # 输入进来的图片必须是2的6次方以上的倍数
    input_image = Input(shape=[None, None, config.IMAGE_SHAPE[2]],
                        name="input_image")
    # meta包含了一些必要信息
    input_image_meta = Input(shape=[config.IMAGE_META_SIZE],
                             name="input_image_meta")

    # RPN建议框网络的真实框信息
    input_rpn_match = Input(shape=[None, 1],
                            name="input_rpn_match",
                            dtype=tf.int32)
    input_rpn_bbox = Input(shape=[None, 4],
                           name="input_rpn_bbox",
                           dtype=tf.float32)

    # 种类信息
    input_gt_class_ids = Input(shape=[None],
                               name="input_gt_class_ids",
                               dtype=tf.int32)

    # 框的位置信息
    input_gt_boxes = Input(shape=[None, 4],
                           name="input_gt_boxes",
                           dtype=tf.float32)

    # 标准化到0-1之间
    gt_boxes = Lambda(lambda x: norm_boxes_graph(x,
                                                 K.shape(input_image)[1:3]))(
                                                     input_gt_boxes)

    # mask语义分析信息
    # [batch, height, width, MAX_GT_INSTANCES]
    if config.USE_MINI_MASK:
        input_gt_masks = Input(
            shape=[config.MINI_MASK_SHAPE[0], config.MINI_MASK_SHAPE[1], None],
            name="input_gt_masks",
            dtype=bool)
    else:
        input_gt_masks = Input(
            shape=[config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1], None],
            name="input_gt_masks",
            dtype=bool)

    # 获得Resnet里的压缩程度不同的一些层
    _, C2, C3, C4, C5 = get_resnet(input_image,
                                   stage5=True,
                                   train_bn=config.TRAIN_BN)

    # 组合成特征金字塔的结构
    # P5长宽共压缩了5次
    # Height/32,Width/32,256
    P5 = Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c5p5')(C5)
    # P4长宽共压缩了4次
    # Height/16,Width/16,256
    P4 = Add(name="fpn_p4add")([
        UpSampling2D(size=(2, 2), name="fpn_p5upsampled")(P5),
        Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c4p4')(C4)
    ])
    # P4长宽共压缩了3次
    # Height/8,Width/8,256
    P3 = Add(name="fpn_p3add")([
        UpSampling2D(size=(2, 2), name="fpn_p4upsampled")(P4),
        Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c3p3')(C3)
    ])
    # P4长宽共压缩了2次
    # Height/4,Width/4,256
    P2 = Add(name="fpn_p2add")([
        UpSampling2D(size=(2, 2), name="fpn_p3upsampled")(P3),
        Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c2p2')(C2)
    ])

    # 各自进行一次256通道的卷积,此时P2、P3、P4、P5通道数相同
    # Height/4,Width/4,256
    P2 = Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3),
                padding="SAME",
                name="fpn_p2")(P2)
    # Height/8,Width/8,256
    P3 = Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3),
                padding="SAME",
                name="fpn_p3")(P3)
    # Height/16,Width/16,256
    P4 = Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3),
                padding="SAME",
                name="fpn_p4")(P4)
    # Height/32,Width/32,256
    P5 = Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3),
                padding="SAME",
                name="fpn_p5")(P5)
    # 在建议框网络里面还有一个P6用于获取建议框
    # Height/64,Width/64,256
    P6 = MaxPooling2D(pool_size=(1, 1), strides=2, name="fpn_p6")(P5)

    # P2, P3, P4, P5, P6可以用于获取建议框
    rpn_feature_maps = [P2, P3, P4, P5, P6]
    # P2, P3, P4, P5用于获取mask信息
    mrcnn_feature_maps = [P2, P3, P4, P5]

    anchors = get_anchors(config, config.IMAGE_SHAPE)
    # 拓展anchors的shape,第一个维度拓展为batch_size
    anchors = np.broadcast_to(anchors, (config.BATCH_SIZE, ) + anchors.shape)
    # 将anchors转化成tensor的形式
    anchors = Lambda(lambda x: tf.Variable(anchors),
                     name="anchors")(input_image)
    # 建立RPN模型
    rpn = build_rpn_model(len(config.RPN_ANCHOR_RATIOS),
                          config.TOP_DOWN_PYRAMID_SIZE)

    rpn_class_logits, rpn_class, rpn_bbox = [], [], []

    # 获得RPN网络的预测结果,进行格式调整,把五个特征层的结果进行堆叠
    for p in rpn_feature_maps:
        logits, classes, bbox = rpn([p])
        rpn_class_logits.append(logits)
        rpn_class.append(classes)
        rpn_bbox.append(bbox)

    rpn_class_logits = Concatenate(axis=1,
                                   name="rpn_class_logits")(rpn_class_logits)
    rpn_class = Concatenate(axis=1, name="rpn_class")(rpn_class)
    rpn_bbox = Concatenate(axis=1, name="rpn_bbox")(rpn_bbox)

    # 此时获得的rpn_class_logits、rpn_class、rpn_bbox的维度是
    # rpn_class_logits : Batch_size, num_anchors, 2
    # rpn_class : Batch_size, num_anchors, 2
    # rpn_bbox : Batch_size, num_anchors, 4
    proposal_count = config.POST_NMS_ROIS_TRAINING

    # Batch_size, proposal_count, 4
    rpn_rois = ProposalLayer(proposal_count=proposal_count,
                             nms_threshold=config.RPN_NMS_THRESHOLD,
                             name="ROI",
                             config=config)([rpn_class, rpn_bbox, anchors])

    active_class_ids = Lambda(lambda x: parse_image_meta_graph(x)[
        "active_class_ids"])(input_image_meta)

    if not config.USE_RPN_ROIS:
        # 使用外部输入的建议框
        input_rois = Input(shape=[config.POST_NMS_ROIS_TRAINING, 4],
                           name="input_roi",
                           dtype=np.int32)
        # Normalize coordinates
        target_rois = Lambda(
            lambda x: norm_boxes_graph(x,
                                       K.shape(input_image)[1:3]))(input_rois)
    else:
        # 利用预测到的建议框进行下一步的操作
        target_rois = rpn_rois
    """找到建议框的ground_truth
    Inputs:
    proposals: [batch, N, (y1, x1, y2, x2)]建议框
    gt_class_ids: [batch, MAX_GT_INSTANCES]每个真实框对应的类
    gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)]真实框的位置
    gt_masks: [batch, height, width, MAX_GT_INSTANCES]真实框的语义分割情况

    Returns: 
    rois: [batch, TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)]内部真实存在目标的建议框
    target_class_ids: [batch, TRAIN_ROIS_PER_IMAGE]每个建议框对应的类
    target_deltas: [batch, TRAIN_ROIS_PER_IMAGE, (dy, dx, log(dh), log(dw)]每个建议框应该有的调整参数
    target_mask: [batch, TRAIN_ROIS_PER_IMAGE, height, width]每个建议框语义分割情况
    """
    rois, target_class_ids, target_bbox, target_mask =\
        DetectionTargetLayer(config, name="proposal_targets")([
            target_rois, input_gt_class_ids, gt_boxes, input_gt_masks])

    # 找到合适的建议框的classifier预测结果
    mrcnn_class_logits, mrcnn_class, mrcnn_bbox =\
        fpn_classifier_graph(rois, mrcnn_feature_maps, input_image_meta,
                                config.POOL_SIZE, config.NUM_CLASSES,
                                train_bn=config.TRAIN_BN,
                                fc_layers_size=config.FPN_CLASSIF_FC_LAYERS_SIZE)
    # 找到合适的建议框的mask预测结果
    mrcnn_mask = build_fpn_mask_graph(rois,
                                      mrcnn_feature_maps,
                                      input_image_meta,
                                      config.MASK_POOL_SIZE,
                                      config.NUM_CLASSES,
                                      train_bn=config.TRAIN_BN)

    output_rois = Lambda(lambda x: x * 1, name="output_rois")(rois)

    # Losses
    rpn_class_loss = Lambda(lambda x: rpn_class_loss_graph(*x),
                            name="rpn_class_loss")(
                                [input_rpn_match, rpn_class_logits])
    rpn_bbox_loss = Lambda(lambda x: rpn_bbox_loss_graph(config, *x),
                           name="rpn_bbox_loss")(
                               [input_rpn_bbox, input_rpn_match, rpn_bbox])
    class_loss = Lambda(lambda x: mrcnn_class_loss_graph(*x),
                        name="mrcnn_class_loss")([
                            target_class_ids, mrcnn_class_logits,
                            active_class_ids
                        ])
    bbox_loss = Lambda(lambda x: mrcnn_bbox_loss_graph(*x),
                       name="mrcnn_bbox_loss")(
                           [target_bbox, target_class_ids, mrcnn_bbox])
    mask_loss = Lambda(lambda x: mrcnn_mask_loss_graph(*x),
                       name="mrcnn_mask_loss")(
                           [target_mask, target_class_ids, mrcnn_mask])

    # Model
    inputs = [
        input_image, input_image_meta, input_rpn_match, input_rpn_bbox,
        input_gt_class_ids, input_gt_boxes, input_gt_masks
    ]

    if not config.USE_RPN_ROIS:
        inputs.append(input_rois)
    outputs = [
        rpn_class_logits, rpn_class, rpn_bbox, mrcnn_class_logits, mrcnn_class,
        mrcnn_bbox, mrcnn_mask, rpn_rois, output_rois, rpn_class_loss,
        rpn_bbox_loss, class_loss, bbox_loss, mask_loss
    ]
    model = Model(inputs, outputs, name='mask_rcnn')
    return model
예제 #20
0
    def detect_image(self, image):
        #-------------------------------------#
        #   转换成RGB图片,可以用于灰度图预测。
        #-------------------------------------#
        image = image.convert("RGB")

        image_shape = np.array(np.shape(image)[0:2])
        old_width, old_height = image_shape[1], image_shape[0]
        old_image = copy.deepcopy(image)

        #---------------------------------------------------------#
        #   给原图像进行resize,resize到短边为600的大小上
        #---------------------------------------------------------#
        width, height = get_new_img_size(old_width, old_height)
        image = image.resize([width, height], Image.BICUBIC)
        photo = np.array(image, dtype=np.float64)

        #-----------------------------------------------------------#
        #   图片预处理,归一化。
        #-----------------------------------------------------------#
        photo = preprocess_input(np.expand_dims(photo, 0))
        rpn_pred = self.model_rpn_get_pred(photo)
        rpn_pred = [x.numpy() for x in rpn_pred]

        #-----------------------------------------------------------#
        #   将建议框网络的预测结果进行解码
        #-----------------------------------------------------------#
        base_feature_width, base_feature_height = self.get_img_output_length(
            width, height)
        anchors = get_anchors([base_feature_width, base_feature_height], width,
                              height)
        rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors)

        #-------------------------------------------------------------#
        #   在获得建议框和共享特征层后,将二者传入classifier中进行预测
        #-------------------------------------------------------------#
        base_layer = rpn_pred[2]
        proposal_box = np.array(rpn_results)[:, :, 1:]
        temp_ROIs = np.zeros_like(proposal_box)
        temp_ROIs[:, :, [0, 1, 2, 3]] = proposal_box[:, :, [1, 0, 3, 2]]
        classifier_pred = self.model_classifier_get_pred(
            [base_layer, temp_ROIs])
        classifier_pred = [x.numpy() for x in classifier_pred]

        #-------------------------------------------------------------#
        #   利用classifier的预测结果对建议框进行解码,获得预测框
        #-------------------------------------------------------------#
        results = self.bbox_util.detection_out_classifier(
            classifier_pred, proposal_box, self.config, self.confidence)

        if len(results[0]) == 0:
            return old_image

        results = np.array(results[0])
        boxes = results[:, :4]
        top_conf = results[:, 4]
        top_label_indices = results[:, 5]
        boxes[:, [0, 2]] = boxes[:, [0, 2]] * old_width
        boxes[:, [1, 3]] = boxes[:, [1, 3]] * old_height

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = max(
            (np.shape(old_image)[0] + np.shape(old_image)[1]) // old_width * 2,
            1)

        image = old_image
        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c)]
            score = top_conf[i]

            left, top, right, bottom = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label, top, left, bottom, right)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[int(c)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[int(c)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        return image
예제 #21
0
def fit_one_epoch(model_rpn, model_all, epoch, epoch_size, epoch_size_val, gen,
                  genval, Epoch, callback):
    total_loss = 0
    rpn_loc_loss = 0
    rpn_cls_loss = 0
    roi_loc_loss = 0
    roi_cls_loss = 0

    val_toal_loss = 0
    with tqdm(total=epoch_size,
              desc=f'Epoch {epoch + 1}/{Epoch}',
              postfix=dict,
              mininterval=0.3) as pbar:
        for iteration, batch in enumerate(gen):
            if iteration >= epoch_size:
                break
            X, Y, boxes = batch[0], batch[1], batch[2]
            P_rpn = model_rpn.predict_on_batch(X)

            height, width, _ = np.shape(X[0])
            base_feature_width, base_feature_height = get_img_output_length(
                width, height)
            anchors = get_anchors([base_feature_width, base_feature_height],
                                  width, height)
            results = bbox_util.detection_out_rpn(P_rpn, anchors)

            roi_inputs = []
            out_classes = []
            out_regrs = []
            for i in range(len(X)):
                R = results[i][:, 1:]
                X2, Y1, Y2 = calc_iou(R, config, boxes[i], NUM_CLASSES)
                roi_inputs.append(X2)
                out_classes.append(Y1)
                out_regrs.append(Y2)

            loss_class = model_all.train_on_batch(
                [X, np.array(roi_inputs)],
                [Y[0], Y[1],
                 np.array(out_classes),
                 np.array(out_regrs)])

            write_log(callback, [
                'total_loss', 'rpn_cls_loss', 'rpn_reg_loss',
                'detection_cls_loss', 'detection_reg_loss'
            ], loss_class, iteration)

            rpn_cls_loss += loss_class[1]
            rpn_loc_loss += loss_class[2]
            roi_cls_loss += loss_class[3]
            roi_loc_loss += loss_class[4]
            total_loss = rpn_loc_loss + rpn_cls_loss + roi_loc_loss + roi_cls_loss

            pbar.set_postfix(
                **{
                    'total': total_loss / (iteration + 1),
                    'rpn_cls': rpn_cls_loss / (iteration + 1),
                    'rpn_loc': rpn_loc_loss / (iteration + 1),
                    'roi_cls': roi_cls_loss / (iteration + 1),
                    'roi_loc': roi_loc_loss / (iteration + 1),
                    'lr': K.get_value(model_rpn.optimizer.lr)
                })
            pbar.update(1)

    print('Start Validation')
    with tqdm(total=epoch_size_val,
              desc=f'Epoch {epoch + 1}/{Epoch}',
              postfix=dict,
              mininterval=0.3) as pbar:
        for iteration, batch in enumerate(genval):
            if iteration >= epoch_size_val:
                break
            X, Y, boxes = batch[0], batch[1], batch[2]
            P_rpn = model_rpn.predict_on_batch(X)

            height, width, _ = np.shape(X[0])
            base_feature_width, base_feature_height = get_img_output_length(
                width, height)
            anchors = get_anchors([base_feature_width, base_feature_height],
                                  width, height)
            results = bbox_util.detection_out_rpn(P_rpn, anchors)

            roi_inputs = []
            out_classes = []
            out_regrs = []
            for i in range(len(X)):
                R = results[i][:, 1:]
                X2, Y1, Y2 = calc_iou(R, config, boxes[i], NUM_CLASSES)
                roi_inputs.append(X2)
                out_classes.append(Y1)
                out_regrs.append(Y2)

            loss_class = model_all.test_on_batch(
                [X, np.array(roi_inputs)],
                [Y[0], Y[1],
                 np.array(out_classes),
                 np.array(out_regrs)])

            val_toal_loss += loss_class[0]
            pbar.set_postfix(**{'total': val_toal_loss / (iteration + 1)})
            pbar.update(1)

    loss_history.append_loss(total_loss / (epoch_size + 1),
                             val_toal_loss / (epoch_size_val + 1))
    print('Finish Validation')
    print('Epoch:' + str(epoch + 1) + '/' + str(Epoch))
    print('Total Loss: %.4f || Val Loss: %.4f ' %
          (total_loss / (epoch_size + 1), val_toal_loss /
           (epoch_size_val + 1)))

    print('Saving state, iter:', str(epoch + 1))
    model_all.save_weights('logs/Epoch%d-Total_Loss%.4f-Val_Loss%.4f.h5' %
                           ((epoch + 1), total_loss /
                            (epoch_size + 1), val_toal_loss /
                            (epoch_size_val + 1)))
    return
예제 #22
0
    def generate(self):
        while True:
            shuffle(self.train_lines)
            lines = self.train_lines

            inputs = []
            target0 = []
            target1 = []
            target2 = []
            for annotation_line in lines:
                img, y = self.get_random_data(annotation_line)
                height, width, _ = np.shape(img)

                if len(y) > 0:
                    boxes = np.array(y[:, :4], dtype=np.float32)
                    boxes[:, 0] = boxes[:, 0] / width
                    boxes[:, 1] = boxes[:, 1] / height
                    boxes[:, 2] = boxes[:, 2] / width
                    boxes[:, 3] = boxes[:, 3] / height
                    y[:, :4] = boxes[:, :4]

                anchors = get_anchors(get_img_output_length(width, height),
                                      width, height)
                #---------------------------------------------------#
                #   assignment分为2个部分,它的shape为 :, 5
                #   :, :4      的内容为网络应该有的回归预测结果
                #   :,  4      的内容为先验框是否包含物体,默认为背景
                #---------------------------------------------------#
                assignment = self.bbox_util.assign_boxes(y, anchors)

                classification = assignment[:, 4]
                regression = assignment[:, :]

                #---------------------------------------------------#
                #   对正样本与负样本进行筛选,训练样本总和为256
                #---------------------------------------------------#
                mask_pos = classification[:] > 0
                num_pos = len(classification[mask_pos])
                if num_pos > self.num_regions / 2:
                    val_locs = random.sample(
                        range(num_pos), int(num_pos - self.num_regions / 2))
                    temp_classification = classification[mask_pos]
                    temp_regression = regression[mask_pos]
                    temp_classification[val_locs] = -1
                    temp_regression[val_locs, -1] = -1
                    classification[mask_pos] = temp_classification
                    regression[mask_pos] = temp_regression

                mask_neg = classification[:] == 0
                num_neg = len(classification[mask_neg])
                mask_pos = classification[:] > 0
                num_pos = len(classification[mask_pos])
                if len(classification[mask_neg]) + num_pos > self.num_regions:
                    val_locs = random.sample(
                        range(num_neg),
                        int(num_neg + num_pos - self.num_regions))
                    temp_classification = classification[mask_neg]
                    temp_classification[val_locs] = -1
                    classification[mask_neg] = temp_classification

                inputs.append(np.array(img))
                target0.append(np.reshape(classification, [-1, 1]))
                target1.append(np.reshape(regression, [-1, 5]))
                target2.append(y)

                if len(inputs) == self.Batch_size:
                    tmp_inp = np.array(inputs)
                    tmp_targets = [
                        np.array(target0, np.float32),
                        np.array(target1, np.float32)
                    ]
                    tmp_y = target2
                    yield preprocess_input(tmp_inp), tmp_targets, tmp_y
                    inputs = []
                    target0 = []
                    target1 = []
                    target2 = []
예제 #23
0
 def _get_prior(self):
     data = get_anchors(image_sizes[self.phi])
     return data
예제 #24
0
    #   训练前,请指定好phi和model_path
    #   二者所使用Efficientdet版本要相同
    #-------------------------------------------#
    phi = 0
    annotation_path = '2007_train.txt'

    classes_path = 'model_data/new_classes.txt'
    class_names = get_classes(classes_path)
    NUM_CLASSES = len(class_names)
    #-------------------------------------------#
    #   权值文件的下载请看README
    #-------------------------------------------#
    model_path = "model_data/efficientdet-d0-voc.h5"

    model = Efficientdet(phi, num_classes=NUM_CLASSES)
    priors = get_anchors(image_sizes[phi])
    bbox_util = BBoxUtility(NUM_CLASSES, priors)

    model.load_weights(model_path, by_name=True, skip_mismatch=True)

    # 0.1用于验证,0.9用于训练
    val_split = 0.1
    with open(annotation_path) as f:
        lines = f.readlines()
    np.random.seed(10101)
    np.random.shuffle(lines)
    np.random.seed(None)
    num_val = int(len(lines) * val_split)
    num_train = len(lines) - num_val

    # 训练参数设置
예제 #25
0
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])
        #---------------------------------------------------------#
        #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
        #---------------------------------------------------------#
        image = cvtColor(image)
        image_origin = np.array(image, np.uint8)
        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        #---------------------------------------------------------#
        image_data, image_metas, windows = resize_image([np.array(image)],
                                                        self.config)
        #---------------------------------------------------------#
        #   根据当前输入图像的大小,生成先验框
        #---------------------------------------------------------#
        anchors = np.expand_dims(get_anchors(self.config, image_data[0].shape),
                                 0)
        #---------------------------------------------------------#
        #   将图像输入网络当中进行预测!
        #---------------------------------------------------------#
        detections, _, _, mrcnn_mask, _, _, _ = self.model.predict(
            [image_data, image_metas, anchors], verbose=0)

        #---------------------------------------------------#
        #   上面获得的预测结果是相对于padding后的图片的
        #   我们需要将预测结果转换到原图上
        #---------------------------------------------------#
        box_thre, class_thre, class_ids, masks_arg, masks_sigmoid = postprocess(
            detections[0], mrcnn_mask[0], image_shape, image_data[0].shape,
            windows[0])

        if box_thre is None:
            return image

        #----------------------------------------------------------------------#
        #   masks_class [image_shape[0], image_shape[1]]
        #   根据每个像素点所属的实例和是否满足门限需求,判断每个像素点的种类
        #----------------------------------------------------------------------#
        masks_class = masks_sigmoid * (class_ids[None, None, :] + 1)
        masks_class = np.reshape(masks_class,
                                 [-1, np.shape(masks_sigmoid)[-1]])
        masks_class = np.reshape(
            masks_class[np.arange(np.shape(masks_class)[0]),
                        np.reshape(masks_arg, [-1])],
            [image_shape[0], image_shape[1]])

        #---------------------------------------------------------#
        #   设置字体与边框厚度
        #---------------------------------------------------------#
        scale = 0.6
        thickness = int(
            max((image.size[0] + image.size[1]) // self.IMAGE_MAX_DIM, 1))
        font = cv2.FONT_HERSHEY_DUPLEX
        color_masks = self.colors[masks_class].astype('uint8')
        image_fused = cv2.addWeighted(color_masks,
                                      0.4,
                                      image_origin,
                                      0.6,
                                      gamma=0)
        for i in range(np.shape(class_ids)[0]):
            top, left, bottom, right = np.array(box_thre[i, :], np.int32)

            #---------------------------------------------------------#
            #   获取颜色并绘制预测框
            #---------------------------------------------------------#
            color = self.colors[class_ids[i] + 1].tolist()
            cv2.rectangle(image_fused, (left, top), (right, bottom), color,
                          thickness)

            #---------------------------------------------------------#
            #   获得这个框的种类并写在图片上
            #---------------------------------------------------------#
            class_name = self.class_names[class_ids[i]]
            print(class_name, top, left, bottom, right)
            text_str = f'{class_name}: {class_thre[i]:.2f}'
            text_w, text_h = cv2.getTextSize(text_str, font, scale, 1)[0]
            cv2.rectangle(image_fused, (left, top),
                          (left + text_w, top + text_h + 5), color, -1)
            cv2.putText(image_fused, text_str, (left, top + 15), font, scale,
                        (255, 255, 255), 1, cv2.LINE_AA)

        image = Image.fromarray(np.uint8(image_fused))
        return image
예제 #26
0
    def get_FPS(self, image, test_interval):
        #-------------------------------------#
        #   转换成RGB图片,可以用于灰度图预测。
        #-------------------------------------#
        image = image.convert("RGB")
        
        image_shape = np.array(np.shape(image)[0:2])
        old_width, old_height = image_shape[1], image_shape[0]
    
        #---------------------------------------------------------#
        #   给原图像进行resize,resize到短边为600的大小上
        #---------------------------------------------------------#
        width, height = get_new_img_size(old_width, old_height)
        image = image.resize([width,height], Image.BICUBIC)
        photo = np.array(image,dtype = np.float64)

        #-----------------------------------------------------------#
        #   图片预处理,归一化。
        #-----------------------------------------------------------#
        photo = preprocess_input(np.expand_dims(photo,0))
        rpn_pred = self.model_rpn.predict(photo)

        #-----------------------------------------------------------#
        #   将建议框网络的预测结果进行解码
        #-----------------------------------------------------------#
        base_feature_width, base_feature_height = self.get_img_output_length(width, height)
        anchors = get_anchors([base_feature_width, base_feature_height], width, height)
        rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors)
        
        #-------------------------------------------------------------#
        #   在获得建议框和共享特征层后,将二者传入classifier中进行预测
        #-------------------------------------------------------------#
        base_layer = rpn_pred[2]
        proposal_box = np.array(rpn_results)[:, :, 1:]
        temp_ROIs = np.zeros_like(proposal_box)
        temp_ROIs[:, :, [0, 1, 2, 3]] = proposal_box[:, :, [1, 0, 3, 2]]
        classifier_pred = self.model_classifier.predict([base_layer, temp_ROIs])
        
        #-------------------------------------------------------------#
        #   利用classifier的预测结果对建议框进行解码,获得预测框
        #-------------------------------------------------------------#
        results = self.bbox_util.detection_out_classifier(classifier_pred, proposal_box, self.config, self.confidence)

        if len(results[0])>0:
            results = np.array(results[0])
            boxes = results[:, :4]
            top_conf = results[:, 4]
            top_label_indices = results[:, 5]
            boxes[:, [0, 2]] = boxes[:, [0, 2]] * old_width
            boxes[:, [1, 3]] = boxes[:, [1, 3]] * old_height

        t1 = time.time()
        for _ in range(test_interval):
            rpn_pred = self.model_rpn.predict(photo)

            #-----------------------------------------------------------#
            #   将建议框网络的预测结果进行解码
            #-----------------------------------------------------------#
            base_feature_width, base_feature_height = self.get_img_output_length(width, height)
            anchors = get_anchors([base_feature_width, base_feature_height], width, height)
            rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors)
            
            #-------------------------------------------------------------#
            #   在获得建议框和共享特征层后,将二者传入classifier中进行预测
            #-------------------------------------------------------------#
            base_layer = rpn_pred[2]
            proposal_box = np.array(rpn_results)[:, :, 1:]
            temp_ROIs = np.zeros_like(proposal_box)
            temp_ROIs[:, :, [0, 1, 2, 3]] = proposal_box[:, :, [1, 0, 3, 2]]
            classifier_pred = self.model_classifier.predict([base_layer, temp_ROIs])
            
            #-------------------------------------------------------------#
            #   利用classifier的预测结果对建议框进行解码,获得预测框
            #-------------------------------------------------------------#
            results = self.bbox_util.detection_out_classifier(classifier_pred, proposal_box, self.config, self.confidence)

            if len(results[0])>0:
                results = np.array(results[0])
                boxes = results[:, :4]
                top_conf = results[:, 4]
                top_label_indices = results[:, 5]
                boxes[:, [0, 2]] = boxes[:, [0, 2]] * old_width
                boxes[:, [1, 3]] = boxes[:, [1, 3]] * old_height

        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time
예제 #27
0
 def _get_prior(self):
     data = get_anchors()
     return data
예제 #28
0
        local_rank      = 0

    if pretrained:
        if distributed:
            if local_rank == 0:
                download_weights(backbone)  
            dist.barrier()
        else:
            download_weights(backbone)

    #----------------------------------------------------#
    #   获取classes和anchor
    #----------------------------------------------------#
    class_names, num_classes = get_classes(classes_path)
    num_classes += 1
    anchors = get_anchors(input_shape, anchors_size, backbone)

    model = SSD300(num_classes, backbone, pretrained)
    if not pretrained:
        weights_init(model)
    if model_path != '':
        #------------------------------------------------------#
        #   权值文件请看README,百度网盘下载
        #------------------------------------------------------#
        if local_rank == 0:
            print('Load weights {}.'.format(model_path))
        
        #------------------------------------------------------#
        #   根据预训练权重的Key和模型的Key进行加载
        #------------------------------------------------------#
        model_dict      = model.state_dict()
예제 #29
0
    def detect_image(self, image_id, image):
        self.confidence = 0.01
        f = open("./input/detection-results/" + image_id + ".txt", "w")

        image_shape = np.array(np.shape(image)[0:2])
        old_width, old_height = image_shape[1], image_shape[0]
        old_image = copy.deepcopy(image)

        #---------------------------------------------------------#
        #   给原图像进行resize,resize到短边为600的大小上
        #---------------------------------------------------------#
        width, height = get_new_img_size(old_width, old_height)
        image = image.resize([width, height], Image.BICUBIC)
        photo = np.array(image, dtype=np.float64)

        #-----------------------------------------------------------#
        #   图片预处理,归一化。
        #-----------------------------------------------------------#
        photo = preprocess_input(np.expand_dims(photo, 0))
        rpn_pred = self.model_rpn.predict(photo)

        #-----------------------------------------------------------#
        #   将建议框网络的预测结果进行解码
        #-----------------------------------------------------------#
        base_feature_width, base_feature_height = self.get_img_output_length(
            width, height)
        anchors = get_anchors([base_feature_width, base_feature_height], width,
                              height)
        rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors)

        #-------------------------------------------------------------#
        #   在获得建议框和共享特征层后,将二者传入classifier中进行预测
        #-------------------------------------------------------------#
        base_layer = rpn_pred[2]
        proposal_box = np.array(rpn_results)[:, :, 1:]
        temp_ROIs = np.zeros_like(proposal_box)
        temp_ROIs[:, :, [0, 1, 2, 3]] = proposal_box[:, :, [1, 0, 3, 2]]
        classifier_pred = self.model_classifier.predict(
            [base_layer, temp_ROIs])

        #-------------------------------------------------------------#
        #   利用classifier的预测结果对建议框进行解码,获得预测框
        #-------------------------------------------------------------#
        results = self.bbox_util.detection_out_classifier(
            classifier_pred, proposal_box, self.config, self.confidence)

        if len(results[0]) == 0:
            return old_image

        results = np.array(results[0])
        boxes = results[:, :4]
        top_conf = results[:, 4]
        top_label_indices = results[:, 5]
        boxes[:, [0, 2]] = boxes[:, [0, 2]] * old_width
        boxes[:, [1, 3]] = boxes[:, [1, 3]] * old_height
        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c)]
            score = str(top_conf[i])

            left, top, right, bottom = boxes[i]
            f.write("%s %s %s %s %s %s\n" %
                    (predicted_class, score[:6], str(int(left)), str(
                        int(top)), str(int(right)), str(int(bottom))))

        f.close()
        return
예제 #30
0
    #   这里的SSD512不是原版的SSD512。
    #   原版的SSD512的比SSD300多一个预测层;
    #   修改起来比较麻烦,所以我只是修改了输入大小
    #   这样也可以用比较大的图片训练,对于小目标有好处
    #----------------------------------------------------#
    input_shape = [300, 300, 3]

    #----------------------------------------------------#
    #   可用于设定先验框的大小,默认的anchors_size
    #   是根据voc数据集设定的,大多数情况下都是通用的!
    #   如果想要检测小物体,可以修改anchors_size
    #   一般调小浅层先验框的大小就行了!因为浅层负责小物体检测!
    #   比如anchors_size = [21,45,99,153,207,261,315]
    #----------------------------------------------------#
    anchors_size = [30, 60, 111, 162, 213, 264, 315]
    priors = get_anchors((input_shape[0], input_shape[1]), anchors_size)
    bbox_util = BBoxUtility(NUM_CLASSES, priors)

    model = SSD300(input_shape, NUM_CLASSES, anchors_size)
    #------------------------------------------------------#
    #   权值文件请看README,百度网盘下载
    #   训练自己的数据集时提示维度不匹配正常
    #   预测的东西都不一样了自然维度不匹配
    #------------------------------------------------------#
    model_path = 'model_data/mobilenet_ssd_weights.h5'
    model.load_weights(model_path, by_name=True, skip_mismatch=True)

    #-------------------------------------------------------------------------------#
    #   训练参数的设置
    #   logging表示tensorboard的保存地址
    #   checkpoint用于设置权值保存的细节,period用于修改多少epoch保存一次