def train_kitti():
    # config for data argument
    cfg = config.Config()

    cfg.use_horizontal_flips = True
    cfg.use_vertical_flips = True
    cfg.rot_90 = True
    cfg.num_rois = 32
    cfg.base_net_weights = os.path.join('./model/', nn.get_weight_path())

    # TODO: the only file should to be change for other data to train
    cfg.model_path = './model/kitti_frcnn_last.hdf5'
    cfg.simple_label_file = 'simple_label.txt'

    all_images, classes_count, class_mapping = get_data(cfg.simple_label_file)
    #print(len(all_images))

    if 'bg' not in classes_count:
        classes_count['bg'] = 0
        class_mapping['bg'] = len(class_mapping)

    cfg.class_mapping = class_mapping
    with open(cfg.config_save_file, 'wb') as config_f:
        pickle.dump(cfg, config_f)
        print(
            'Config has been written to {}, and can be loaded when testing to ensure correct results'
            .format(cfg.config_save_file))

    inv_map = {v: k for k, v in class_mapping.items()}

    print('Training images per class:')
    pprint.pprint(classes_count)
    print('Num classes (including bg) = {}'.format(len(classes_count)))
    random.shuffle(all_images)
    num_imgs = len(all_images)
    train_imgs = [s for s in all_images if s['imageset'] == 'trainval']
    #print(len(train_imgs))
    val_imgs = [s for s in all_images if s['imageset'] == 'test']

    print('Num train samples {}'.format(len(train_imgs)))
    print('Num val samples {}'.format(len(val_imgs)))

    data_gen_train = data_generators.get_anchor_gt(train_imgs,
                                                   classes_count,
                                                   cfg,
                                                   nn.get_img_output_length,
                                                   K.image_dim_ordering(),
                                                   mode='train')
    data_gen_val = data_generators.get_anchor_gt(val_imgs,
                                                 classes_count,
                                                 cfg,
                                                 nn.get_img_output_length,
                                                 K.image_dim_ordering(),
                                                 mode='val')

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
    else:
        input_shape_img = (None, None, 3)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(None, 4))

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios)
    rpn = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(shared_layers,
                               roi_input,
                               cfg.num_rois,
                               nb_classes=len(classes_count),
                               trainable=True)

    model_rpn = Model(img_input, rpn[:2])
    model_classifier = Model([img_input, roi_input], classifier)

    # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    model_all = Model([img_input, roi_input], rpn[:2] + classifier)

    try:
        print('loading weights from {}'.format(cfg.base_net_weights))
        model_rpn.load_weights(cfg.model_path, by_name=True)
        model_classifier.load_weights(cfg.model_path, by_name=True)
    except Exception as e:
        print(e)
        print(
            'Could not load pretrained model weights. Weights can be found in the keras application folder '
            'https://github.com/fchollet/keras/tree/master/keras/applications')

    optimizer = Adam(lr=1e-5)
    optimizer_classifier = Adam(lr=1e-5)
    model_rpn.compile(optimizer=optimizer,
                      loss=[
                          losses_fn.rpn_loss_cls(num_anchors),
                          losses_fn.rpn_loss_regr(num_anchors)
                      ])
    model_classifier.compile(
        optimizer=optimizer_classifier,
        loss=[
            losses_fn.class_loss_cls,
            losses_fn.class_loss_regr(len(classes_count) - 1)
        ],
        metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
    model_all.compile(optimizer='sgd', loss='mae')

    epoch_length = 1000
    num_epochs = int(cfg.num_epochs)
    iter_num = 0

    losses = np.zeros((epoch_length, 5))
    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []
    start_time = time.time()

    best_loss = np.Inf

    class_mapping_inv = {v: k for k, v in class_mapping.items()}
    print('Starting training')

    vis = True

    for epoch_num in range(num_epochs):

        progbar = generic_utils.Progbar(epoch_length)
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

        while True:
            try:

                if len(rpn_accuracy_rpn_monitor
                       ) == epoch_length and cfg.verbose:
                    mean_overlapping_bboxes = float(
                        sum(rpn_accuracy_rpn_monitor)) / len(
                            rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    print(
                        'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'
                        .format(mean_overlapping_bboxes, epoch_length))
                    if mean_overlapping_bboxes == 0:
                        print(
                            'RPN is not producing bounding boxes that overlap'
                            ' the ground truth boxes. Check RPN settings or keep training.'
                        )

                X, Y, img_data = next(data_gen_train)

                loss_rpn = model_rpn.train_on_batch(X, Y)

                P_rpn = model_rpn.predict_on_batch(X)

                result = roi_helpers.rpn_to_roi(P_rpn[0],
                                                P_rpn[1],
                                                cfg,
                                                K.image_dim_ordering(),
                                                use_regr=True,
                                                overlap_thresh=0.6,
                                                max_boxes=300)
                # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
                X2, Y1, Y2, IouS = roi_helpers.calc_iou(
                    result, img_data, cfg, class_mapping)

                if X2 is None:
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue

                neg_samples = np.where(Y1[0, :, -1] == 1)
                pos_samples = np.where(Y1[0, :, -1] == 0)

                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0]
                else:
                    neg_samples = []

                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []

                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if cfg.num_rois > 1:
                    if len(pos_samples) < cfg.num_rois // 2:
                        selected_pos_samples = pos_samples.tolist()
                    else:
                        selected_pos_samples = np.random.choice(
                            pos_samples, cfg.num_rois // 2,
                            replace=False).tolist()
                    try:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            cfg.num_rois - len(selected_pos_samples),
                            replace=False).tolist()
                    except:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            cfg.num_rois - len(selected_pos_samples),
                            replace=True).tolist()

                    sel_samples = selected_pos_samples + selected_neg_samples
                else:
                    # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                    selected_pos_samples = pos_samples.tolist()
                    selected_neg_samples = neg_samples.tolist()
                    if np.random.randint(0, 2):
                        sel_samples = random.choice(neg_samples)
                    else:
                        sel_samples = random.choice(pos_samples)

                loss_class = model_classifier.train_on_batch(
                    [X, X2[:, sel_samples, :]],
                    [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])

                losses[iter_num, 0] = loss_rpn[1]
                losses[iter_num, 1] = loss_rpn[2]

                losses[iter_num, 2] = loss_class[1]
                losses[iter_num, 3] = loss_class[2]
                losses[iter_num, 4] = loss_class[3]

                iter_num += 1

                progbar.update(
                    iter_num,
                    [('rpn_cls', np.mean(losses[:iter_num, 0])),
                     ('rpn_regr', np.mean(losses[:iter_num, 1])),
                     ('detector_cls', np.mean(losses[:iter_num, 2])),
                     ('detector_regr', np.mean(losses[:iter_num, 3]))])

                if iter_num == epoch_length:
                    loss_rpn_cls = np.mean(losses[:, 0])
                    loss_rpn_regr = np.mean(losses[:, 1])
                    loss_class_cls = np.mean(losses[:, 2])
                    loss_class_regr = np.mean(losses[:, 3])
                    class_acc = np.mean(losses[:, 4])

                    mean_overlapping_bboxes = float(sum(
                        rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if cfg.verbose:
                        print(
                            'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'
                            .format(mean_overlapping_bboxes))
                        print(
                            'Classifier accuracy for bounding boxes from RPN: {}'
                            .format(class_acc))
                        print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                        print('Loss RPN regression: {}'.format(loss_rpn_regr))
                        print('Loss Detector classifier: {}'.format(
                            loss_class_cls))
                        print('Loss Detector regression: {}'.format(
                            loss_class_regr))
                        print('Elapsed time: {}'.format(time.time() -
                                                        start_time))

                    curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                    iter_num = 0
                    start_time = time.time()

                    if curr_loss < best_loss:
                        if cfg.verbose:
                            print(
                                'Total loss decreased from {} to {}, saving weights'
                                .format(best_loss, curr_loss))
                        best_loss = curr_loss
                        model_all.save_weights(cfg.model_path)

                    break

            except Exception as e:
                print('Exception: {}'.format(e))
                # save model
                model_all.save_weights(cfg.model_path)
                continue
    print('Training complete, exiting.')
示例#2
0
img_input = Input(shape=input_shape_img)
roi_input = Input(shape=(C_deepfashion.num_rois, 4))
feature_map_input = Input(shape=input_shape_features)

#df model
# define the base network (resnet here, can be VGG, Inception, etc)
shared_layers_df = nn.nn_base(img_input, trainable=False)

# define the RPN, built on the base layers
num_anchors_df = len(C_deepfashion.anchor_box_scales) * len(
    C_deepfashion.anchor_box_ratios)
rpn_layers_df = nn.rpn(shared_layers_df, num_anchors_df)

classifier_df = nn.classifier(feature_map_input,
                              roi_input,
                              C_deepfashion.num_rois,
                              nb_classes=len(class_mapping_df),
                              trainable=True)

model_rpn_df = Model(img_input, rpn_layers_df)
model_classifier_only_df = Model([feature_map_input, roi_input], classifier_df)

model_classifier_df = Model([feature_map_input, roi_input], classifier_df)

# print('Loading weights from {}'.format(C.model_path))
model_rpn_df.load_weights("model_frcnn.hdf5_epoch_9", by_name=True)
model_classifier_df.load_weights("model_frcnn.hdf5_epoch_9", by_name=True)

model_rpn_df.compile(optimizer='sgd', loss='mse')
model_classifier_df.compile(optimizer='sgd', loss='mse')
示例#3
0
else:
    input_shape_img = (None, None, 3)

img_input = Input(shape=input_shape_img)
roi_input = Input(shape=(None, 4))

# define the base network (resnet here, can be VGG, Inception, etc)
shared_layers = nn.nn_base(img_input, trainable=True)

# define the RPN, built on the base layers
num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
rpn = nn.rpn(shared_layers, num_anchors)

classifier = nn.classifier(shared_layers,
                           roi_input,
                           C.num_rois,
                           nb_classes=len(classes_count),
                           trainable=True)

model_rpn = Model(img_input, rpn[:2])
model_classifier = Model([img_input, roi_input], classifier)

# this is a model that holds both the RPN and the classifier, used to load/save weights for the models
model_all = Model([img_input, roi_input], rpn[:2] + classifier)

try:
    print('loading weights from {}'.format(C.base_net_weights))
    model_rpn.load_weights(C.base_net_weights, by_name=True)
    model_classifier.load_weights(C.base_net_weights, by_name=True)
except Exception as e:
    print(
示例#4
0
feature_map_input = Input(shape=input_shape_features)

roi_input = Input(shape=(num_rois, 4))

# define the base network (resnet here, can be VGG, Inception, etc)
shared_layers = nn.nn_base(img_input)

# define the RPN, built on the base layers
num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
rpn = nn.rpn(shared_layers, num_anchors)

# classifier, uses base layers + proposals
print(class_mapping)

classifier = nn.classifier(feature_map_input,
                           roi_input,
                           num_rois,
                           nb_classes=len(class_mapping))

model_rpn = Model(img_input, rpn + [shared_layers])
model_classifier = Model([feature_map_input, roi_input], classifier)

weights_path = 'model_frcnn.hdf5'
model_rpn.load_weights(weights_path, by_name=True)
model_classifier.load_weights(weights_path, by_name=True)

model_rpn.compile(optimizer='sgd', loss='mse')
model_classifier.compile(optimizer='sgd', loss='mse')

all_imgs = []

classes = {}
示例#5
0
def measure_map(config_output_filename, real_model_path):
    with open(config_output_filename, 'r') as f_in:
        C = pickle.load(f_in)

    # img_path = options.test_path
    img_path = '/home/comp/e4252392/map4frcnn.txt'

    class_mapping = C.class_mapping

    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.iteritems()}
    print(class_mapping)
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }
    # C.num_rois = int(options.num_rois)
    C.num_rois = 32

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
        input_shape_features = (1024, None, None)
    else:
        input_shape_img = (None, None, 3)
        input_shape_features = (None, None, 1024)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(class_mapping),
                               trainable=True)

    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)

    model_rpn.load_weights(real_model_path, by_name=True)
    model_classifier.load_weights(real_model_path, by_name=True)

    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    all_imgs, _, _ = get_data(img_path)
    # test_imgs = [s for s in all_imgs if s['imageset'] == 'test']
    test_imgs = [s for s in all_imgs]

    T = {}
    P = {}
    print('Calculating mAP')
    st = time.time()
    for idx, img_data in enumerate(test_imgs):
        # print('{}/{}'.format(idx,len(test_imgs)))
        # st = time.time()
        filepath = img_data['filepath']

        img = cv2.imread(filepath)

        X, fx, fy = format_img(img, C)

        if K.image_dim_ordering() == 'tf':
            X = np.transpose(X, (0, 2, 3, 1))

        # get the feature maps and output from the RPN
        [Y1, Y2, F] = model_rpn.predict(X)

        R = roi_helpers.rpn_to_roi(Y1,
                                   Y2,
                                   C,
                                   K.image_dim_ordering(),
                                   overlap_thresh=0.7)

        # convert from (x1,y1,x2,y2) to (x,y,w,h)
        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]

        # apply the spatial pyramid pooling to the proposed regions
        bboxes = {}
        probs = {}

        for jk in range(R.shape[0] // C.num_rois + 1):
            ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :],
                                  axis=0)
            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // C.num_rois:
                # pad R
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

            for ii in range(P_cls.shape[1]):

                if np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                    continue

                cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

                if cls_name not in bboxes:
                    bboxes[cls_name] = []
                    probs[cls_name] = []

                (x, y, w, h) = ROIs[0, ii, :]

                cls_num = np.argmax(P_cls[0, ii, :])
                try:
                    (tx, ty, tw, th) = P_regr[0, ii,
                                              4 * cls_num:4 * (cls_num + 1)]
                    tx /= C.classifier_regr_std[0]
                    ty /= C.classifier_regr_std[1]
                    tw /= C.classifier_regr_std[2]
                    th /= C.classifier_regr_std[3]
                    x, y, w, h = roi_helpers.apply_regr(
                        x, y, w, h, tx, ty, tw, th)
                except:
                    pass
                bboxes[cls_name].append(
                    [16 * x, 16 * y, 16 * (x + w), 16 * (y + h)])
                probs[cls_name].append(np.max(P_cls[0, ii, :]))

        all_dets = []
        album_ap = 0.0
        logo_ap = 0.0
        for key in bboxes:
            bbox = np.array(bboxes[key])

            new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
                bbox, np.array(probs[key]), overlap_thresh=0.5)
            for jk in range(new_boxes.shape[0]):
                (x1, y1, x2, y2) = new_boxes[jk, :]
                det = {
                    'x1': x1,
                    'x2': x2,
                    'y1': y1,
                    'y2': y2,
                    'class': key,
                    'prob': new_probs[jk]
                }
                all_dets.append(det)

        # print('Elapsed time = {}'.format(time.time() - st))
        t, p = get_map(all_dets, img_data['bboxes'], (fx, fy))
        for key in t.keys():
            if key not in T:
                T[key] = []
                P[key] = []
            T[key].extend(t[key])
            P[key].extend(p[key])
        all_aps = []
        for key in T.keys():
            ap = average_precision_score(T[key], P[key])
            # print('{} AP: {}'.format(key, ap))
            all_aps.append(ap)

            if idx == len(test_imgs) - 1:
                if key == 'album':
                    album_ap = ap
                if key == 'logo':
                    logo_ap = ap

        # print('mAP = {}'.format(np.mean(np.array(all_aps))))
        if idx == len(test_imgs) - 1:
            mAP = np.mean(np.array(all_aps))
            print('Elapsed time = {}'.format(time.time() - st))
            print('album ap = {}'.format(album_ap))
            print('logo ap = {}'.format(logo_ap))
            print('mAP = {}'.format(mAP))

    return [album_ap, logo_ap, mAP]
示例#6
0
if K.image_dim_ordering() == 'th':
	input_shape_img = (3, None, None)
else:
	input_shape_img = (None, None, 3)

img_input = Input(shape=input_shape_img)
roi_input = Input(shape=(C.num_rois, 4))

# define the base network (resnet here, can be VGG, Inception, etc)
shared_layers = nn.nn_base(img_input, trainable=True)

# define the RPN, built on the base layers
num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
rpn = nn.rpn(shared_layers, num_anchors)

classifier = nn.classifier(shared_layers, roi_input, C.num_rois, nb_classes=len(classes_count), trainable=True)

model_rpn = Model(img_input, rpn[:2])
model_classifier = Model([img_input, roi_input], classifier)

# this is a model that holds both the RPN and the classifier, used to load/save weights for the models
model_all = Model([img_input, roi_input], rpn[:2] + classifier)

try:
	print('loading weights from {}'.format(C.base_net_weights))
	model_rpn.load_weights(C.base_net_weights, by_name=True)
	model_classifier.load_weights(C.base_net_weights, by_name=True)
except:
	print('Could not load pretrained model weights. Weights can be found at {} and {}'.format(
		'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_th_dim_ordering_th_kernels_notop.h5',
		'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
示例#7
0
def predict(args_):
    path = args_.path
    with open('config.pickle', 'rb') as f_in:
        cfg = pickle.load(f_in)
    cfg.use_horizontal_flips = False
    cfg.use_vertical_flips = False
    cfg.rot_90 = False

    class_mapping = cfg.class_mapping
    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.items()}
    # tensorflow的输入方式是none,none,3
    input_shape_img = (None, None, 3)
    # 这里如果是resnet,num_features = 1024
    # 如果是vgg,num_features = 512
    input_shape_features = (None, None, 1024)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(cfg.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    # 构建rpn输出
    # anchor的个数
    num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)
    # 构建classifier的输出,参数分别是:特征层输出,预选框,探测框的输入,多少个类,是否可训练。
    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               cfg.num_rois,
                               nb_classes=len(class_mapping),
                               trainable=True)

    # 构建网络
    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)

    # 加载参数
    print('Loading weights from {}'.format(cfg.model_path))
    # self.model_path = 'model_trained/model_frcnn.vgg.hdf5'
    # 这里是resnet,所以不太明白这里的model_path???
    model_rpn.load_weights(cfg.model_path, by_name=True)
    model_classifier.load_weights(cfg.model_path, by_name=True)

    # 这里是测试阶段。不用训练,但是这种加载模型,之后要求编译是keras要求的,随便找个mse即可
    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    Ap = [[] for i in range(len(class_mapping))]
    print("Appp", Ap)
    if os.path.isdir(path):
        for idx, img_name in enumerate(sorted(os.listdir(path))):
            if not img_name.lower().endswith(
                ('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
                continue
            print(img_name)
            predict_single_image(os.path.join(path, img_name), model_rpn,
                                 model_classifier_only, cfg, class_mapping, Ap)
    elif os.path.isfile(path):
        print('predict image from {}'.format(path))
        predict_single_image(path, model_rpn, model_classifier_only, cfg,
                             class_mapping, Ap)
    print("Ap:")
    print(Ap)
    Acc = []
    for i in range(len(class_mapping)):
        sum = 0
        avg = 0
        for j in Ap[i]:
            sum += j
        if len(Ap[i]) != 0:
            avg = sum / (len(Ap[i]))
            Acc.append(avg)
        else:
            Acc.append(0)
    for i in range(len(class_mapping)):
        print(class_mapping[i] + "acc:{}".format(Acc[i]))
示例#8
0
def configure_keras_models(config):
    """Configures Keras.

    Args:
        nn (Neural Net): A keras NN.
        config (object): The config file.
    Returns:
        tuple: A tuple of three classifiers: (1) the rpn classifier, (2) the
               classifier (only), (3) the classifier.
    """
    # Import the correct NN according to config
    # This must be called within main so that import is called
    if config.network == 'resnet50':
        import keras_frcnn.resnet as nn
        num_features = 1024
    elif config.network == 'vgg':
        import keras_frcnn.vgg as nn
        num_features = 512

    # Configure Keras
    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
        input_shape_features = (num_features, None, None)
    else:
        input_shape_img = (None, None, 3)
        input_shape_features = (None, None, num_features)

    # Config inputs
    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(config.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(config.anchor_box_scales) * len(config.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)

    # Classifier to return
    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               config.num_rois,
                               nb_classes=len(config.class_mapping),
                               trainable=True)

    print("Configuring Keras with:")
    print("  - Neural Network: %s..." % config.network)
    print("  - Weights loaded from: %s" % config.model_path)
    print("  - Dimension Ordering: %s" % K.image_dim_ordering())
    print("  - Num Features: %s" % num_features)
    print("  - Num rois: %s" % config.num_rois)

    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)
    model_classifier = Model([feature_map_input, roi_input], classifier)

    model_rpn.load_weights(config.model_path, by_name=True)
    model_classifier.load_weights(config.model_path, by_name=True)

    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    return (model_rpn, model_classifier_only, model_classifier)
示例#9
0
def upload_file():
    print("request is ", request.files)
    st = time.time()
    content_length = request.content_length
    print(f"Content_length : {content_length}")
    print("data type is ", type(request))
    print("data type of request files  ", type(request.files))
    data_dict = request.form.to_dict()
    #print(type(data_dict))
    #print(data_dict['file'])
    data = (data_dict['file'].split(',')[1])
    #print(len(data_dict))
    #print(data)
    imgdata = base64.b64decode(data)
    print("imagedata type is", type(imgdata))
    img2 = Image.open(io.BytesIO(imgdata))
    print(type(img2))
    #img2.show()
    #img = cv2.imread(img2)
    #print('opencv type' , type(img))
    #print(type(img))
    a = np.array(img2.getdata()).astype(np.float64)
    #print('datatype of w ', w.dtype)
    #b = np.ones(172800,3)
    #a = np.concatenate((w,b), axis=None)
    print('type of data to model ', type(a))
    print('shape of data from frontend', a.shape)
    #r, c = a.shape
    #print('Value of r', r)
    if a.shape == (480000, 3):
        data = a.reshape(600, 800, 3)
    else:
        data = a.reshape(480, 640, 3)

    st = time.time()

    parser = OptionParser()

    parser.add_option(
        "-n",
        "--num_rois",
        type="int",
        dest="num_rois",
        help="Number of ROIs per iteration. Higher means more memory use.",
        default=4)
    parser.add_option(
        "--config_filename",
        dest="config_filename",
        help=
        "Location to read the metadata related to the training (generated when training).",
        default="config.pickle")
    parser.add_option("--network",
                      dest="network",
                      help="Base network to use. Supports vgg or resnet50.",
                      default='vgg')

    (options, args) = parser.parse_args()

    config_output_filename = options.config_filename

    with open(config_output_filename, 'rb') as f_in:
        C = pickle.load(f_in)

    if C.network == 'resnet50':
        import keras_frcnn.resnet as nn
    elif C.network == 'vgg':
        import keras_frcnn.vgg as nn

    # turn off any data augmentation at test time
    C.use_horizontal_flips = False
    C.use_vertical_flips = False
    C.rot_90 = False

    class_mapping = C.class_mapping

    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.items()}
    print(class_mapping)
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }
    C.num_rois = int(options.num_rois)

    if C.network == 'resnet50':
        num_features = 1024
    elif C.network == 'vgg':
        num_features = 512

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
        input_shape_features = (num_features, None, None)
    else:
        input_shape_img = (None, None, 3)
        input_shape_features = (None, None, num_features)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(class_mapping),
                               trainable=True)

    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)

    print('Loading weights from {}'.format(C.model_path))
    model_rpn.load_weights(C.model_path, by_name=True)
    model_classifier.load_weights(C.model_path, by_name=True)

    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    all_imgs = []

    classes = {}

    bbox_threshold = 0.6

    visualise = True

    #if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
    #	continue
    #print(img_name)
    #filepath = os.path.join(img_path,img_name)

    img = data

    #cv2.imshow('img', img)
    #cv2.waitKey(0)

    X, ratio = format_img(img, C)

    if K.image_dim_ordering() == 'tf':
        X = np.transpose(X, (0, 2, 3, 1))

    # get the feature maps and output from the RPN
    [Y1, Y2, F] = model_rpn.predict(X)

    R = roi_helpers.rpn_to_roi(Y1,
                               Y2,
                               C,
                               K.image_dim_ordering(),
                               overlap_thresh=0.6)

    # convert from (x1,y1,x2,y2) to (x,y,w,h)
    R[:, 2] -= R[:, 0]
    R[:, 3] -= R[:, 1]

    # apply the spatial pyramid pooling to the proposed regions
    bboxes = {}
    probs = {}

    for jk in range(R.shape[0] // C.num_rois + 1):
        ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :],
                              axis=0)
        if ROIs.shape[1] == 0:
            break

        if jk == R.shape[0] // C.num_rois:
            #pad R
            curr_shape = ROIs.shape
            target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
            ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
            ROIs_padded[:, :curr_shape[1], :] = ROIs
            ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
            ROIs = ROIs_padded

        [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

        for ii in range(P_cls.shape[1]):

            if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(
                    P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                continue

            cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

            if cls_name not in bboxes:
                bboxes[cls_name] = []
                probs[cls_name] = []

            (x, y, w, h) = ROIs[0, ii, :]

            cls_num = np.argmax(P_cls[0, ii, :])
            try:
                (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                tx /= C.classifier_regr_std[0]
                ty /= C.classifier_regr_std[1]
                tw /= C.classifier_regr_std[2]
                th /= C.classifier_regr_std[3]
                x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
            except:
                pass
            bboxes[cls_name].append([
                C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w),
                C.rpn_stride * (y + h)
            ])
            probs[cls_name].append(np.max(P_cls[0, ii, :]))

    all_dets = []

    for key in bboxes:
        bbox = np.array(bboxes[key])

        new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
            bbox, np.array(probs[key]), overlap_thresh=0.6)
        for jk in range(new_boxes.shape[0]):
            (x1, y1, x2, y2) = new_boxes[jk, :]

            (real_x1, real_y1, real_x2,
             real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

            cv2.rectangle(
                img, (real_x1, real_y1), (real_x2, real_y2),
                (int(class_to_color[key][0]), int(
                    class_to_color[key][1]), int(class_to_color[key][2])), 2)

            textLabel = '{}: {}'.format(key, int(100 * new_probs[jk]))
            all_dets.append((key, 100 * new_probs[jk]))

            (retval, baseLine) = cv2.getTextSize(textLabel,
                                                 cv2.FONT_HERSHEY_COMPLEX, 1,
                                                 1)
            textOrg = (real_x1, real_y1 - 0)

            cv2.rectangle(
                img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                (0, 0, 0), 2)
            cv2.rectangle(
                img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                (255, 255, 255), -1)
            cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1,
                        (0, 0, 0), 1)

    print('Elapsed time = {}'.format(time.time() - st))
    print('number of windoiws detected', len(all_dets))
    print(all_dets)
    r = len(all_dets)
    img3 = normalize(img)
    #plt.imshow(img)
    #cv2.imshow('img3', img3)
    #cv2.waitKey(0)

    K.clear_session()
    #data = process(data)
    #print('after reshape',data.shape)
    im2 = Image.fromarray(img.astype("uint8"), "RGB")
    print("im2 data type is ", type(im2))
    #to_frontend = (" ".join(str(x) for x in data))
    db = data.tobytes()
    print('type of data to database :', type(db))
    todb = insertBLOB('Image007', db)
    print('final data shape fed to model : ', data.shape)
    # ImageFile img = db.b64encode()
    # with open("t.png", "rb") as imageFile:
    # str = base64.b64encode(imageFile.read())
    #cv2.imshow('image', cv2.cvtColor(data, cv2.COLOR_BGR2GRAY))
    #cv2.waitKey()
    #str = base64.b64encode(data)
    # return jsonify(to_frontend, r)

    #img = Image.open( 'C:\Window Counter_Project\Flickr\Window_101 (131).jpg' )
    #img.load()

    #data = np.asarray( img, dtype="int32" )
    #im = Image.fromarray(data.astype("uint8"))
    #im.show()  # uncomment to look at the image
    rawBytes = io.BytesIO()
    print(rawBytes)
    im2.save(rawBytes, "jpeg")
    #im2.show()
    print('type of im2 is ', type(im2))
    rawBytes.seek(0)  # return to the start of the file
    response_obj = {
        'count': r,
        'image':
        "data:image/jpeg;base64," + str(base64.b64encode(rawBytes.read()))
    }
    #print("response is", type(response_obj))
    return jsonify(Data=response_obj)
示例#10
0
def work(path, curelist, textedit):
    #print("I'm hearing too")
    #textedit.append("I'm hearing too")
    sys.setrecursionlimit(40000)
    test_path = path

    parser = OptionParser()

    parser.add_option("-p",
                      "--path",
                      dest="test_path",
                      help="Path to test data.",
                      default=test_path)
    parser.add_option(
        "-n",
        "--num_rois",
        dest="num_rois",
        help="Number of ROIs per iteration. Higher means more memory use.",
        default=256)
    parser.add_option(
        "--config_filename",
        dest="config_filename",
        help=
        "Location to read the metadata related to the training (generated when training).",
        default="config.pickle")
    parser.add_option("-o",
                      "--parser",
                      dest="parser",
                      help="Parser to use. One of simple or pascal_voc",
                      default="pascal_voc"),  #default="pascal_voc"

    (options, args) = parser.parse_args()

    if not options.test_path:  # if filename is not given
        parser.error(
            'Error: path to test data must be specified. Pass --path to command line'
        )

    if options.parser == 'pascal_voc':
        from keras_frcnn.pascal_voc_parser import get_data
    elif options.parser == 'simple':
        from keras_frcnn.simple_parser import get_data
    else:
        raise ValueError(
            "Command line option parser must be one of 'pascal_voc' or 'simple'"
        )

    config_output_filename = options.config_filename

    with open(config_output_filename, 'rb') as f_in:
        C = pickle.load(f_in)

    # turn off any data augmentation at test time
    C.use_horizontal_flips = False
    C.use_vertical_flips = False
    C.rot_90 = False

    img_path = options.test_path

    class_mapping = C.class_mapping

    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    #class_mapping = {v: k for k, v in class_mapping.iteritems()}
    class_mapping = {v: k for k, v in class_mapping.items()}
    print(class_mapping)
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }
    C.num_rois = int(options.num_rois)

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
        input_shape_features = (1024, None, None)
    else:
        input_shape_img = (None, None, 3)
        input_shape_features = (None, None, 1024)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(class_mapping),
                               trainable=True)

    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)

    model_rpn.load_weights(C.model_path, by_name=True)
    model_classifier.load_weights(C.model_path, by_name=True)

    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    #####
    all_imgs, _, _ = get_data(options.test_path)
    test_imgs = [s for s in all_imgs if s['imageset'] == 'test']

    #test_imgs=test_imgs1[3111:4056]

    T = {}
    P = {}
    for idx, img_data in enumerate(test_imgs):
        print('{}/{}'.format(idx, len(test_imgs)))
        #textedit.append('{}/{}'.format(idx,len(test_imgs)))

        st = time.time()
        filepath = img_data['filepath']
        print(filepath)
        img = cv2.imread(filepath)

        X, fx, fy = format_img(img, C)

        if K.image_dim_ordering() == 'tf':
            X = np.transpose(X, (0, 2, 3, 1))

        # get the feature maps and output from the RPN
        [Y1, Y2, F] = model_rpn.predict(X)

        R = roi_helpers.rpn_to_roi(Y1,
                                   Y2,
                                   C,
                                   K.image_dim_ordering(),
                                   overlap_thresh=0.5)  ##0.7

        # convert from (x1,y1,x2,y2) to (x,y,w,h)
        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]

        # apply the spatial pyramid pooling to the proposed regions
        bboxes = {}
        probs = {}

        for jk in range(R.shape[0] // C.num_rois + 1):
            ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :],
                                  axis=0)
            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // C.num_rois:
                # pad R
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

            for ii in range(P_cls.shape[1]):

                if np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                    continue

                cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

                if cls_name not in bboxes:
                    bboxes[cls_name] = []
                    probs[cls_name] = []

                (x, y, w, h) = ROIs[0, ii, :]

                cls_num = np.argmax(P_cls[0, ii, :])
                try:
                    (tx, ty, tw, th) = P_regr[0, ii,
                                              4 * cls_num:4 * (cls_num + 1)]
                    tx /= C.classifier_regr_std[0]
                    ty /= C.classifier_regr_std[1]
                    tw /= C.classifier_regr_std[2]
                    th /= C.classifier_regr_std[3]
                    x, y, w, h = roi_helpers.apply_regr(
                        x, y, w, h, tx, ty, tw, th)
                except:
                    pass
                bboxes[cls_name].append(
                    [16 * x, 16 * y, 16 * (x + w), 16 * (y + h)])
                probs[cls_name].append(np.max(P_cls[0, ii, :]))

        all_dets = []

        for key in bboxes:
            bbox = np.array(bboxes[key])

            new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
                bbox, np.array(probs[key]), overlap_thresh=0.45)  ###0.5
            for jk in range(new_boxes.shape[0]):
                (x1, y1, x2, y2) = new_boxes[jk, :]
                det = {
                    'x1': x1,
                    'x2': x2,
                    'y1': y1,
                    'y2': y2,
                    'class': key,
                    'prob': new_probs[jk]
                }
                all_dets.append(det)

        print('Elapsed time = {}'.format(time.time() - st))
        cursor = textedit.textCursor()
        cursor.movePosition(QtGui.QTextCursor.End)
        cursor.insertText('{}/{}'.format(idx, len(test_imgs)))
        cursor.insertText("\r\n")
        cursor.insertText('Elapsed time = {}'.format(time.time() - st))
        cursor.insertText("\r\n")
        # textedit.append('Elapsed time = {}'.format(time.time() - st))
        textedit.setTextCursor(cursor)
        textedit.ensureCursorVisible()

        t, p = get_map(all_dets, img_data['bboxes'], (fx, fy))
        for key in t.keys():
            if key not in T:
                T[key] = []
                P[key] = []
            T[key].extend(t[key])
            P[key].extend(p[key])
    p1 = []
    t1 = []
    p1.append(P["airbase"])
    t1.append(T["airbase"])

    p1.append(P["harbour"])
    t1.append(T["harbour"])

    p1.append(P["island"])
    t1.append(T["island"])

    prefastr = []
    recfastr = []
    apfastr = []

    for m in range(len(p1)):
        p11 = np.zeros(len(p1[m]))
        for i in range(len(p1[m])):
            if p1[m][i] > 0.45:
                p11[i] = 1
            else:
                p11[i] = 0
        p_p1 = p11
        t_t1 = t1[m]

        false_positives = np.zeros((0, ))
        true_positives = np.zeros((0, ))
        scores = np.zeros((0, ))
        num_annotations = 0.0
        nump = len(p_p1)

        # for n in range (1915):#(len(p_p1)):
        for n in range(len(p_p1)):
            if t_t1[n] == 1 and p_p1[n] == 1:
                true_positives = np.append(true_positives, 1)
                false_positives = np.append(false_positives, 0)
                scores = np.append(scores, p1[m][n])

            if t_t1[n] == 0 and p_p1[n] == 1:
                true_positives = np.append(true_positives, 0)
                false_positives = np.append(false_positives, 1)
                scores = np.append(scores, p1[m][n])

            # if t_t1[n]==1 and p_p1[n]==0:
            #   true_positives = np.append(true_positives, 0)
            #  false_positives = np.append(false_positives, 0)
            # scores = np.append(scores, p1[m][n])

            if t_t1[n] == 1:
                num_annotations = num_annotations + 1

        descending_indices = np.argsort(-scores)
        true_positives = true_positives[descending_indices]
        false_positives = false_positives[descending_indices]

        # compute false positives and true positives
        true_positives = np.cumsum(true_positives)
        false_positives = np.cumsum(false_positives)

        # compute recall and precision
        recall1 = true_positives / num_annotations
        precision1 = true_positives / np.maximum(
            true_positives + false_positives,
            np.finfo(np.float64).eps)

        average_precision = compute_ap(recall1, precision1)

        prefastr.append(precision1)
        recfastr.append(recall1)
        apfastr.append(average_precision)
    for i in range(0, len(recfastr)):
        curelist[i].setData(recfastr[i], prefastr[i])
示例#11
0
def operation():

    #After prediction
    K.clear_session()

    sys.setrecursionlimit(40000)

    parser = OptionParser()

    parser.add_option("-p",
                      "--path",
                      dest="test_path",
                      default="images",
                      help="Path to test data.")
    parser.add_option(
        "-n",
        "--num_rois",
        dest="num_rois",
        help="Number of ROIs per iteration. Higher means more memory use.",
        default=32)
    parser.add_option(
        "--config_filename",
        dest="config_filename",
        help=
        "Location to read the metadata related to the training (generated when training).",
        default="config.pickle")
    parser.add_option("--network",
                      dest="network",
                      help="Base network to use. Supports vgg or resnet50.",
                      default='resnet50')

    (options, args) = parser.parse_args()

    if not options.test_path:  # if filename is not given
        parser.error(
            'Error: path to test data must be specified. Pass --path to command line'
        )

    config_output_filename = options.config_filename

    with open(config_output_filename, 'rb') as f_in:
        C = pickle.load(f_in)

    if C.network == 'resnet50':
        import keras_frcnn.resnet as nn
    elif C.network == 'vgg':
        import keras_frcnn.vgg as nn

    # turn off any data augmentation at test time
    C.use_horizontal_flips = False
    C.use_vertical_flips = False
    C.rot_90 = False

    img_path = options.test_path

    def format_img_size(img, C):
        """ formats the image size based on config """
        img_min_side = float(C.im_size)
        (height, width, _) = img.shape

        if width <= height:
            ratio = img_min_side / width
            new_height = int(ratio * height)
            new_width = int(img_min_side)
        else:
            ratio = img_min_side / height
            new_width = int(ratio * width)
            new_height = int(img_min_side)
        img = cv2.resize(img, (new_width, new_height),
                         interpolation=cv2.INTER_CUBIC)
        return img, ratio

    def format_img_channels(img, C):
        """ formats the image channels based on config """
        img = img[:, :, (2, 1, 0)]
        img = img.astype(np.float32)
        img[:, :, 0] -= C.img_channel_mean[0]
        img[:, :, 1] -= C.img_channel_mean[1]
        img[:, :, 2] -= C.img_channel_mean[2]
        img /= C.img_scaling_factor
        img = np.transpose(img, (2, 0, 1))
        img = np.expand_dims(img, axis=0)
        return img

    def format_img(img, C):
        """ formats an image for model prediction based on config """
        img, ratio = format_img_size(img, C)
        img = format_img_channels(img, C)
        return img, ratio

    # Method to transform the coordinates of the bounding box to its original size
    def get_real_coordinates(ratio, x1, y1, x2, y2):

        real_x1 = int(round(x1 // ratio))
        real_y1 = int(round(y1 // ratio))
        real_x2 = int(round(x2 // ratio))
        real_y2 = int(round(y2 // ratio))

        return (real_x1, real_y1, real_x2, real_y2)

    class_mapping = C.class_mapping

    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.items()}
    print(class_mapping)
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }
    C.num_rois = int(options.num_rois)

    if C.network == 'resnet50':
        num_features = 1024
    elif C.network == 'vgg':
        num_features = 512

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
        input_shape_features = (num_features, None, None)
    else:
        input_shape_img = (None, None, 3)
        input_shape_features = (None, None, num_features)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(class_mapping),
                               trainable=True)

    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)

    print('Loading weights from {}'.format(C.model_path))
    model_rpn.load_weights(C.model_path, by_name=True)
    model_classifier.load_weights(C.model_path, by_name=True)

    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    all_imgs = []

    classes = {}

    bbox_threshold = 0.8

    visualise = True
    object = []
    for idx, img_name in enumerate(sorted(os.listdir(img_path))):
        if not img_name.lower().endswith(
            ('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
            continue
        print(img_name)
        st = time.time()
        filepath = os.path.join(img_path, img_name)

        img = cv2.imread(filepath)

        X, ratio = format_img(img, C)

        if K.image_dim_ordering() == 'tf':
            X = np.transpose(X, (0, 2, 3, 1))

        # get the feature maps and output from the RPN
        [Y1, Y2, F] = model_rpn.predict(X)

        R = roi_helpers.rpn_to_roi(Y1,
                                   Y2,
                                   C,
                                   K.image_dim_ordering(),
                                   overlap_thresh=0.7)

        # convert from (x1,y1,x2,y2) to (x,y,w,h)
        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]

        # apply the spatial pyramid pooling to the proposed regions
        bboxes = {}
        probs = {}
        for jk in range(R.shape[0] // C.num_rois + 1):
            ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :],
                                  axis=0)
            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // C.num_rois:
                # pad R
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

            for ii in range(P_cls.shape[1]):

                if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(
                        P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                    continue

                cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

                if cls_name not in bboxes:
                    bboxes[cls_name] = []
                    probs[cls_name] = []

                (x, y, w, h) = ROIs[0, ii, :]

                cls_num = np.argmax(P_cls[0, ii, :])
                try:
                    (tx, ty, tw, th) = P_regr[0, ii,
                                              4 * cls_num:4 * (cls_num + 1)]
                    tx /= C.classifier_regr_std[0]
                    ty /= C.classifier_regr_std[1]
                    tw /= C.classifier_regr_std[2]
                    th /= C.classifier_regr_std[3]
                    x, y, w, h = roi_helpers.apply_regr(
                        x, y, w, h, tx, ty, tw, th)
                except:
                    pass
                bboxes[cls_name].append([
                    C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w),
                    C.rpn_stride * (y + h)
                ])
                probs[cls_name].append(np.max(P_cls[0, ii, :]))

        all_dets = []
        item = []
        for key in bboxes:
            bbox = np.array(bboxes[key])
            new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
                bbox, np.array(probs[key]), overlap_thresh=0.5)
            for jk in range(new_boxes.shape[0]):
                (x1, y1, x2, y2) = new_boxes[jk, :]

                (real_x1, real_y1, real_x2,
                 real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

                cv2.rectangle(
                    img, (real_x1, real_y1), (real_x2, real_y2),
                    (int(class_to_color[key][0]), int(class_to_color[key][1]),
                     int(class_to_color[key][2])), 2)

                textLabel = '{}: {}'.format(key, int(100 * new_probs[jk]))
                all_dets.append((key, 100 * new_probs[jk]))

                (retval, baseLine) = cv2.getTextSize(textLabel,
                                                     cv2.FONT_HERSHEY_COMPLEX,
                                                     1, 1)
                textOrg = (real_x1, real_y1 - 0)

                cv2.rectangle(
                    img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                    (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                    (0, 0, 0), 2)
                cv2.rectangle(
                    img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                    (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                    (255, 255, 255), -1)
                cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX,
                            1, (0, 0, 0), 1)
                # print(textLabel)
                # print(real_x1, real_y1, real_x2, real_y2)
                if (100 * new_probs[jk]) > 95:
                    item.append([key, [real_x1, real_y1, real_x2, real_y2]])
        print(all_dets)
        object.append([[idx], [item]])

        # print (object)

        # print('Elapsed time = {}'.format(time.time() - st))

        # cv2.imshow('img', img)
        # cv2.waitKey(0)
        cv2.imwrite('./results_imgs/{}.png'.format(idx), img)
    print("=======================")

    return object
示例#12
0
def testModel(config_filename='config_ui.pickle'):

    st.markdown('## Starting validation of test data set')
    sys.setrecursionlimit(40000)

    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True
    config.log_device_placement = True
    sess = tf.compat.v1.Session(config=config)
    K.set_session(sess)

    test_path = 'test'
    num_rois = 4
    config_filename = config_filename
    network = 'resnet50'

    config_output_filename = config_filename

    with open(config_output_filename, 'rb') as f_in:
        C = pickle.load(f_in)

    if C.network == 'resnet50':
        import keras_frcnn.resnet as nn
    elif C.network == 'vgg':
        import keras_frcnn.vgg as nn

    # turn off any data augmentation at test time
    C.use_horizontal_flips = False
    C.use_vertical_flips = False
    C.rot_90 = False

    img_path = test_path

    class_mapping = C.class_mapping

    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.items()}
    st.write('Class Mapping', class_mapping)
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }

    C.num_rois = int(num_rois)

    if C.network == 'resnet50':
        num_features = 1024
    elif C.network == 'vgg':
        num_features = 512

    if K.image_data_format() == 'channels_first':
        input_shape_img = (3, None, None)
        input_shape_features = (num_features, None, None)
    else:
        input_shape_img = (None, None, 3)
        input_shape_features = (None, None, num_features)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    ## Defining Model

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(class_mapping),
                               trainable=True)

    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)

    st.write(f'Loading weights from {C.model_path}')
    model_rpn.load_weights(C.model_path, by_name=True)
    model_classifier.load_weights(C.model_path, by_name=True)

    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    all_imgs = []

    classes = {}

    ## few hyper parameters to select bbox
    bbox_threshold = 0.9

    visualise = True

    set_Overlap_threshold = 0.1
    progress_bar = st.progress(0.0)
    with st.spinner('Wait for sample test images...'):

        for idx, img_name in enumerate(sorted(os.listdir(img_path))):
            progress_bar.progress((idx + 0.1) / len(os.listdir(img_path)))
            if not img_name.lower().endswith(
                ('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
                continue
            st.write(img_name)
            stTime = time.time()
            filepath = os.path.join(img_path, img_name)

            img = cv2.imread(filepath)

            X, ratio = format_img(img, C)

            if K.image_data_format() == 'channels_last':
                X = np.transpose(X, (0, 2, 3, 1))

            # get the feature maps and output from the RPN
            [Y1, Y2, F] = model_rpn.predict(X)

            R = roi_helpers.rpn_to_roi(Y1,
                                       Y2,
                                       C,
                                       K.image_data_format(),
                                       overlap_thresh=0.7)  #0.7

            # convert from (x1,y1,x2,y2) to (x,y,w,h)
            R[:, 2] -= R[:, 0]
            R[:, 3] -= R[:, 1]

            # apply the spatial pyramid pooling to the proposed regions
            bboxes = {}
            probs = {}

            for jk in range(R.shape[0] // C.num_rois + 1):
                ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois *
                                        (jk + 1), :],
                                      axis=0)
                if ROIs.shape[1] == 0:
                    print("ROI Shape: ", ROIs.shape[1])
                    break

                if jk == R.shape[0] // C.num_rois:
                    #pad R
                    curr_shape = ROIs.shape
                    target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
                    ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                    ROIs_padded[:, :curr_shape[1], :] = ROIs
                    ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                    ROIs = ROIs_padded

                [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

                for ii in range(P_cls.shape[1]):
                    #print("np max:",np.max(P_cls[0, ii, :]))
                    #print("np argmax:",np.argmax(P_cls[0, ii, :]))
                    #print(np.max(P_cls[0, ii, :]) < bbox_threshold)
                    if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(
                            P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                        continue

                    cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]
                    #print('class name:',cls_name)
                    if cls_name not in bboxes:
                        bboxes[cls_name] = []
                        probs[cls_name] = []

                    (x, y, w, h) = ROIs[0, ii, :]

                    cls_num = np.argmax(P_cls[0, ii, :])
                    try:
                        (tx, ty, tw,
                         th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                        tx /= C.classifier_regr_std[0]
                        ty /= C.classifier_regr_std[1]
                        tw /= C.classifier_regr_std[2]
                        th /= C.classifier_regr_std[3]
                        x, y, w, h = roi_helpers.apply_regr(
                            x, y, w, h, tx, ty, tw, th)
                    except:
                        pass
                    bboxes[cls_name].append([
                        C.rpn_stride * x, C.rpn_stride * y,
                        C.rpn_stride * (x + w), C.rpn_stride * (y + h)
                    ])
                    probs[cls_name].append(np.max(P_cls[0, ii, :]))

            all_dets = []

            for key in bboxes:
                bbox = np.array(bboxes[key])

                new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
                    bbox,
                    np.array(probs[key]),
                    overlap_thresh=set_Overlap_threshold)
                for jk in range(new_boxes.shape[0]):
                    (x1, y1, x2, y2) = new_boxes[jk, :]

                    (real_x1, real_y1, real_x2,
                     real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

                    cv2.rectangle(img, (real_x1, real_y1), (real_x2, real_y2),
                                  (int(class_to_color[key][0]),
                                   int(class_to_color[key][1]),
                                   int(class_to_color[key][2])), 2)

                    textLabel = f'{key}: {int(100*new_probs[jk])}'
                    all_dets.append((key, 100 * new_probs[jk]))

                    (retval,
                     baseLine) = cv2.getTextSize(textLabel,
                                                 cv2.FONT_HERSHEY_COMPLEX, 1,
                                                 1)
                    textOrg = (real_x1, real_y1 - 0)

                    cv2.rectangle(img,
                                  (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                                  (textOrg[0] + retval[0] + 5,
                                   textOrg[1] - retval[1] - 5), (0, 0, 0), 2)
                    cv2.rectangle(img,
                                  (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                                  (textOrg[0] + retval[0] + 5,
                                   textOrg[1] - retval[1] - 5),
                                  (255, 255, 255), -1)
                    cv2.putText(img, textLabel, textOrg,
                                cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1)

            st.write(f'Elapsed time = {time.time() - stTime}')
            st.write(all_dets)
            plt.figure(figsize=(10, 10))
            plt.grid()
            plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            #plt.show()
            st.image(img, use_column_width=True, clamp=True)

            #cv2.imwrite('./results_imgs-fp-mappen-test/{}.png'.format(os.path.splitext(str(img_name))[0]),img)

    from keras_frcnn.simple_parser import get_data
    test_path = 'test_annotationAlt.txt'  # Test data (annotation file)

    startTime = time.time()
    test_imgs, classes_count, class_mapping = get_data(test_path)

    st.write('Spend %0.2f mins to load test data' %
             ((time.time() - startTime) / 60))

    class_mapping = C.class_mapping
    class_mapping = {v: k for k, v in class_mapping.items()}
    st.write(class_mapping)
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }

    from sklearn.metrics import average_precision_score
    set_Overlap_threshold = 0.1

    T = {}
    P = {}
    mAPs = []
    iou_map = []

    progress_bar1 = st.progress(0.0)
    with st.spinner('Wait for test set evaluation...'):

        for idx, img_data in enumerate(test_imgs):
            progress_bar1.progress((idx + 0.1) / len(test_imgs))
            st.write('{}/{}'.format(idx, len(test_imgs)))
            startTime = time.time()
            filepath = img_data['filepath']

            img = cv2.imread(filepath)

            X, fx, fy = format_img_map(img, C)

            # Change X (img) shape from (1, channel, height, width) to (1, height, width, channel)
            X = np.transpose(X, (0, 2, 3, 1))

            # get the feature maps and output from the RPN
            [Y1, Y2, F] = model_rpn.predict(X)

            R = rpn_to_roi(Y1,
                           Y2,
                           C,
                           K.image_data_format(),
                           overlap_thresh=0.7)

            # convert from (x1,y1,x2,y2) to (x,y,w,h)
            R[:, 2] -= R[:, 0]
            R[:, 3] -= R[:, 1]

            # apply the spatial pyramid pooling to the proposed regions
            bboxes = {}
            probs = {}

            for jk in range(R.shape[0] // C.num_rois + 1):
                ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois *
                                        (jk + 1), :],
                                      axis=0)
                if ROIs.shape[1] == 0:
                    break

                if jk == R.shape[0] // C.num_rois:
                    # pad R
                    curr_shape = ROIs.shape
                    target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
                    ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                    ROIs_padded[:, :curr_shape[1], :] = ROIs
                    ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                    ROIs = ROIs_padded

                [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

                # Calculate all classes' bboxes coordinates on resized image (300, 400)
                # Drop 'bg' classes bboxes
                for ii in range(P_cls.shape[1]):

                    # If class name is 'bg', continue

                    if np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                        continue

                    # Get class name
                    cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

                    if cls_name not in bboxes:
                        bboxes[cls_name] = []
                        probs[cls_name] = []

                    (x, y, w, h) = ROIs[0, ii, :]

                    cls_num = np.argmax(P_cls[0, ii, :])
                    try:
                        (tx, ty, tw,
                         th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                        tx /= C.classifier_regr_std[0]
                        ty /= C.classifier_regr_std[1]
                        tw /= C.classifier_regr_std[2]
                        th /= C.classifier_regr_std[3]
                        x, y, w, h = roi_helpers.apply_regr(
                            x, y, w, h, tx, ty, tw, th)
                    except:
                        pass
                    bboxes[cls_name].append(
                        [16 * x, 16 * y, 16 * (x + w), 16 * (y + h)])
                    probs[cls_name].append(np.max(P_cls[0, ii, :]))

            all_dets = []

            for key in bboxes:
                bbox = np.array(bboxes[key])

                # Apply non-max-suppression on final bboxes to get the output bounding boxe
                new_boxes, new_probs = non_max_suppression_fast(
                    bbox,
                    np.array(probs[key]),
                    overlap_thresh=set_Overlap_threshold)
                for jk in range(new_boxes.shape[0]):
                    (x1, y1, x2, y2) = new_boxes[jk, :]
                    det = {
                        'x1': x1,
                        'x2': x2,
                        'y1': y1,
                        'y2': y2,
                        'class': key,
                        'prob': new_probs[jk]
                    }
                    all_dets.append(det)

            st.write('Elapsed time = {}'.format(time.time() - startTime))
            t, p, iouVal = get_map(all_dets, img_data['bboxes'], (fx, fy))
            for key in t.keys():
                if key not in T:
                    T[key] = []
                    P[key] = []
                T[key].extend(t[key])
                P[key].extend(p[key])
            all_aps = []
            for key in T.keys():
                ap = average_precision_score(T[key], P[key])
                st.write('{} AP: {}'.format(key, ap))
                all_aps.append(ap)
            st.write('mAP = {}'.format(np.mean(np.array(all_aps))))
            st.write('iou = {}'.format(np.mean(iouVal)))
            mAPs.append(np.mean(np.array(all_aps)))
            iou_map.append(iouVal)

    st.markdown('## Mean IOU:', Average(iou_map))
    st.markdown('## Mean average precision:', np.mean(np.array(mAPs)))
示例#13
0
def test_frcnn(img_name, config, model):
    from keras.models import Model
    from keras_frcnn.RoiPoolingConv import RoiPoolingConv
    import keras_frcnn.resnet as nn

    config_output_filename = config
    with open(config_output_filename, 'rb') as f_in:
        C = pickle.load(f_in)

    C.use_horizontal_flips = False
    C.use_vertical_flips = False
    C.rot_90 = False
    C.num_rois = 32
    C.model_path = model

    class_mapping = C.class_mapping
    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)
    class_mapping = {v: k for k, v in class_mapping.items()}
    print(class_mapping)

    num_features = 1024
    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
        input_shape_features = (num_features, None, None)
    else:
        input_shape_img = (None, None, 3)
        input_shape_features = (None, None, num_features)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    shared_layers = nn.nn_base(img_input, trainable=True)

    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(class_mapping),
                               trainable=True)

    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)
    model_classifier = Model([feature_map_input, roi_input], classifier)

    print('Loading weights from {}'.format(C.model_path))
    model_rpn.load_weights(C.model_path, by_name=True)
    model_classifier.load_weights(C.model_path, by_name=True)

    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    # crop
    bbox_threshold = 0.8
    crops = []
    # img_path = path
    # for idx, img_name in enumerate(sorted(os.listdir(img_path))):
    #     if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
    #         continue
    print(img_name)

    st = time.time()
    # filepath = os.path.join(img_path,img_name)
    # img = cv2.imread(filepath)
    img = cv2.imread(img_name)

    X, ratio = format_img(img, C)
    if K.image_dim_ordering() == 'tf':
        X = np.transpose(X, (0, 2, 3, 1))

    [Y1, Y2, F] = model_rpn.predict(X)
    R = roi_helpers.rpn_to_roi(Y1,
                               Y2,
                               C,
                               K.image_dim_ordering(),
                               overlap_thresh=0.7)
    R[:, 2] -= R[:, 0]
    R[:, 3] -= R[:, 1]

    bboxes = {}
    probs = {}
    # roifs = {}
    # scenefs = {}

    for jk in range(R.shape[0] // C.num_rois + 1):
        ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :],
                              axis=0)
        if ROIs.shape[1] == 0:
            break

        if jk == R.shape[0] // C.num_rois:  #pad R
            curr_shape = ROIs.shape
            target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
            ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
            ROIs_padded[:, :curr_shape[1], :] = ROIs
            ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
            ROIs = ROIs_padded

        # out_roi_pool = RoiPoolingConv(14, C.num_rois)([feature_map_input, roi_input])
        # haha = Model(inputs=[feature_map_input, roi_input], outputs=out_roi_pool)
        # all_roifs = haha.predict([F, ROIs])
        # [all_scenefs, P_cls, P_regr] = model_classifier_only.predict([F, ROIs
        [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

        for ii in range(P_cls.shape[1]):
            if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(
                    P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                continue

            cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]
            if cls_name not in bboxes:
                bboxes[cls_name] = []
                probs[cls_name] = []
                # roifs[cls_name] = []
                # scenefs[cls_name] = []

            (x, y, w, h) = ROIs[0, ii, :]

            cls_num = np.argmax(P_cls[0, ii, :])
            try:
                (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                tx /= C.classifier_regr_std[0]
                ty /= C.classifier_regr_std[1]
                tw /= C.classifier_regr_std[2]
                th /= C.classifier_regr_std[3]
                x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
            except:
                pass
            bboxes[cls_name].append([
                C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w),
                C.rpn_stride * (y + h)
            ])
            probs[cls_name].append(np.max(P_cls[0, ii, :]))
            # roifs[cls_name].append(all_roifs[0, ii, :])
            # scenefs[cls_name].append(all_scenefs[0, ii, :])

    all_dets = []
    for key in bboxes:
        bbox = np.array(bboxes[key])
        prob = np.array(probs[key])
        # roif = np.array(roifs[key])
        # scenef = np.array(scenefs[key])

        # new_boxes, new_probs, new_roifs, new_scenefs = roi_helpers.nmsf(bbox, prob, roif, scenef, overlap_thresh=0.5)
        new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
            bbox, np.array(probs[key]), overlap_thresh=0.5)
        for jk in range(new_boxes.shape[0]):
            (x1, y1, x2, y2) = new_boxes[jk, :]
            (real_x1, real_y1, real_x2,
             real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)
            all_dets.append(
                (key, 100 * new_probs[jk], real_x1, real_x2, real_y1, real_y2))

    crops.append([])
    crops[len(crops) - 1].append(img_name)
    for i in range(len(all_dets)):
        cropped = img[all_dets[i][4]:all_dets[i][5],
                      max(0, all_dets[i][2]):all_dets[i][3]]
        # croppath = '/home/comp/e4252392/end2end/crops0317'
        # croppath = '/users/sunjingxuan/pycharmprojects/end2end/crops0317_cpu'
        croppath = '/users/sunjingxuan/desktop/FYP_demo/flask/end2end/crops_demo'
        cropname = os.path.join(
            croppath,
            str(img_name.split(".")[0].split("/")[-1]) + "_cropped" + str(i) +
            ".jpg")
        cv2.imwrite(cropname, cropped)

        # append key, cropname
        crops[len(crops) - 1].append((all_dets[i][0], cropname))

    print('Elapsed time = {}'.format(time.time() - st))
    print(all_dets)
    return crops
示例#14
0
# anchor box ratios
anchor_box_ratios = [[1, 1], [1, 2], [2, 1]]

num_rois = 32

img_input = Input(shape=input_shape_img)
roi_input = Input(shape=(None, 4))

# define the base network (resnet here, can be VGG, Inception, etc)
shared_layers = nn.nn_base(img_input, trainable=True)

# define the RPN, built on the base layers
num_anchors = len(anchor_box_scales) * len(anchor_box_ratios)
rpn = nn.rpn(shared_layers, num_anchors)

classifier = nn.classifier(shared_layers, roi_input, num_rois, nb_classes=21, trainable=True)

model_rpn = Model(img_input, rpn[:2])
model_classifier = Model([img_input, roi_input], classifier)

# this is a model that holds both the RPN and the classifier, used to load/save weights for the models
model_all = Model([img_input, roi_input], rpn[:2] + classifier)


optimizer = Adam(lr=1e-5)
optimizer_classifier = Adam(lr=1e-5)
model_rpn.compile(optimizer=optimizer, loss=[losses.rpn_loss_cls(num_anchors), losses.rpn_loss_regr(num_anchors)])
model_classifier.compile(optimizer=optimizer_classifier,
                         loss=[losses.class_loss_cls, losses.class_loss_regr(21-1)],
                         metrics={'dense_class_{}'.format(21): 'accuracy'})
model_all.compile(optimizer='sgd', loss='mae')
	input_shape_img = (None, None, 3)
	input_shape_features = (None, None, num_features)


img_input = Input(shape=input_shape_img)
roi_input = Input(shape=(C.num_rois, 4))
feature_map_input = Input(shape=input_shape_features)

# define the base network (resnet here, can be VGG, Inception, etc)
shared_layers = nn.nn_base(img_input, trainable=True)

# define the RPN, built on the base layers
num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
rpn_layers = nn.rpn(shared_layers, num_anchors)

classifier = nn.classifier(feature_map_input, roi_input, C.num_rois, nb_classes=len(class_mapping), trainable=True)

model_rpn = Model(img_input, rpn_layers)
model_classifier_only = Model([feature_map_input, roi_input], classifier)

model_classifier = Model([feature_map_input, roi_input], classifier)

print('Loading weights from {}'.format(C.model_path))
model_rpn.load_weights(C.model_path, by_name=True)
model_classifier.load_weights(C.model_path, by_name=True)

model_rpn.compile(optimizer='sgd', loss='mse')
model_classifier.compile(optimizer='sgd', loss='mse')

all_imgs = []
示例#16
0
def Detect_old(movie):
    sys.setrecursionlimit(40000)

    # parser = OptionParser()

    # parser.add_option("-p", "--path", dest="test_path", help="Path to test data.")
    # parser.add_option("-n", "--num_rois", dest="num_rois",
    # 				help="Number of ROIs per iteration. Higher means more memory use.", default=32)
    # parser.add_option("--config_filename", dest="config_filename", help=
    # 				"Location to read the metadata related to the training (generated when training).",
    # 				default="config.pickle")
    # parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50')

    # (options, args) = parser.parse_args()

    # # if not options.test_path:   # if filename is not given
    # # 	parser.error('Error: path to test data must be specified. Pass --path to command line')

    # config_output_filename = options.config_filename
    config_output_filename = "config.pickle"

    with open(config_output_filename, 'rb') as f_in:
        C = pickle.load(f_in)

    if C.network == 'resnet50':
        import keras_frcnn.resnet as nn
    elif C.network == 'vgg':
        import keras_frcnn.vgg as nn

    # turn off any data augmentation at test time
    C.use_horizontal_flips = False
    C.use_vertical_flips = False
    C.rot_90 = False

    #img_path = options.test_path

    def format_img_size(img, C):
        """ formats the image size based on config """
        img_min_side = float(C.im_size)
        (height, width) = img.shape

        if width <= height:
            ratio = img_min_side / width
            new_height = int(ratio * height)
            new_width = int(img_min_side)
        else:
            ratio = img_min_side / height
            new_width = int(ratio * width)
            new_height = int(img_min_side)
        img = Image.fromarray(img)
        img = img.resize((new_width, new_height), Image.ANTIALIAS)
        img = np.array(img)

        #img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
        return img, ratio

    def format_img_channels(img, C):
        img = img.astype(np.float32)
        img -= np.mean(img.flatten())
        img /= np.std(img.flatten())
        img = img[:, :, np.newaxis]

        img = np.transpose(img, (2, 0, 1))
        img = np.expand_dims(img, axis=0)

        #""" formats the image channels based on config """
        #img = img[:, :, (2, 1, 0)]
        #img = img.astype(np.float32)
        #img[:, :, 0] -= C.img_channel_mean[0]
        #img[:, :, 1] -= C.img_channel_mean[1]
        #img[:, :, 2] -= C.img_channel_mean[2]
        #img /= C.img_scaling_factor
        #img = np.transpose(img, (2, 0, 1))
        #img = np.expand_dims(img, axis=0)
        return img

    def format_img(img, C):
        """ formats an image for model prediction based on config """
        img, ratio = format_img_size(img, C)
        img = format_img_channels(img, C)
        return img, ratio

    # Method to transform the coordinates of the bounding box to its original size
    def get_real_coordinates(ratio, x1, y1, x2, y2):

        # real_x1 = int(round(x1 // ratio))
        # real_y1 = int(round(y1 // ratio))
        # real_x2 = int(round(x2 // ratio))
        # real_y2 = int(round(y2 // ratio))
        real_x1 = x1 / ratio
        real_y1 = y1 / ratio
        real_x2 = x2 / ratio
        real_y2 = y2 / ratio

        return (real_x1, real_y1, real_x2, real_y2)

    class_mapping = C.class_mapping

    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.items()}
    #print(class_mapping)
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }
    #C.num_rois = int(options.num_rois)
    C.num_rois = 32

    if C.network == 'resnet50':
        num_features = 1024
    elif C.network == 'vgg':
        num_features = 512

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
        input_shape_features = (num_features, None, None)
    else:
        input_shape_img = (None, None, 1)
        input_shape_features = (None, None, num_features)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(class_mapping),
                               trainable=True)

    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)

    print('Loading weights from {}'.format(C.model_path))
    model_rpn.load_weights(C.model_path, by_name=True)
    model_classifier.load_weights(C.model_path, by_name=True)

    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    all_imgs = []

    classes = {}

    bbox_threshold = 0.8

    #visualise = True

    #for idx, img_name in enumerate(sorted(os.listdir(img_path))):
    #	if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
    #		continue

    def AddPosAndRadFromCornerCoords(coords, pnr=None):
        (x0, y0, x1, y1) = coords
        centre = (np.array([x0 + x1, y0 + y1]) / 2)[np.newaxis, :]
        rad = np.array([max(x1 - x0, y1 - y0)]) / 2
        if pnr is None:
            pnr = positions_and_radii(centre, rad)
        else:
            pnr.positions = np.concatenate((pnr.positions, centre))
            pnr.radii = np.concatenate((pnr.radii, rad))

        return pnr

    print("---\n---")

    rois = []
    for idx in range(movie.shape[-1]):
        st = time.time()
        #_, img = data_augment.augment(n=np.random.randint(low=1, high=5))
        img = movie[:, :, idx]

        X, ratio = format_img(img, C)
        X = np.transpose(X, (0, 2, 3, 1))

        # tmp = Image.fromarray((img * 255).astype(np.uint8))
        # img = Image.new('RGBA', tmp.size)
        # img.paste(tmp)
        # del tmp
        # draw = ImageDraw.Draw(img)
        #img = (img * 255).astype(np.uint8)
        #img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)

        [Y1, Y2, F] = model_rpn.predict(X)
        R = roi_helpers.rpn_to_roi(Y1,
                                   Y2,
                                   C,
                                   K.image_dim_ordering(),
                                   overlap_thresh=0.7)

        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]

        bboxes = {}
        probs = {}

        for jk in range(R.shape[0] // C.num_rois + 1):
            ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :],
                                  axis=0)
            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // C.num_rois:
                #pad R
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

            for ii in range(P_cls.shape[1]):
                if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(
                        P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                    continue

                cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

                if cls_name not in bboxes:
                    bboxes[cls_name] = []
                    probs[cls_name] = []

                (x, y, w, h) = ROIs[0, ii, :]

                cls_num = np.argmax(P_cls[0, ii, :])
                try:
                    (tx, ty, tw, th) = P_regr[0, ii,
                                              4 * cls_num:4 * (cls_num + 1)]
                    tx /= C.classifier_regr_std[0]
                    ty /= C.classifier_regr_std[1]
                    tw /= C.classifier_regr_std[2]
                    th /= C.classifier_regr_std[3]
                    x, y, w, h = roi_helpers.apply_regr(
                        x, y, w, h, tx, ty, tw, th)
                except:
                    pass
                bboxes[cls_name].append([
                    C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w),
                    C.rpn_stride * (y + h)
                ])
                probs[cls_name].append(np.max(P_cls[0, ii, :]))

        #all_dets = []

        det = None
        for key in bboxes:
            bbox = np.array(bboxes[key])

            new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
                bbox, np.array(probs[key]), overlap_thresh=0.2)
            for jk in range(new_boxes.shape[0]):
                (x1, y1, x2, y2) = new_boxes[jk, :]

                (real_x1, real_y1, real_x2,
                 real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)
                det = AddPosAndRadFromCornerCoords(
                    (real_x1, real_y1, real_x2, real_y2), det)

                #draw.rectangle(xy=[real_x1, real_y1, real_x2, real_y2], outline='red')
                #cv2.rectangle(img,(real_x1, real_y1), (real_x2, real_y2), (int(class_to_color[key][0]), int(class_to_color[key][1]), int(class_to_color[key][2])),2)

                #textLabel = '{}: {}'.format(key,int(100*new_probs[jk]))
                #all_dets.append((key,100*new_probs[jk]))

                #(retval,baseLine) = cv2.getTextSize(textLabel,cv2.FONT_HERSHEY_COMPLEX,1,1)
                #textOrg = (real_x1, real_y1-0)

                #cv2.rectangle(img, (textOrg[0] - 5, textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (0, 0, 0), 2)
                #cv2.rectangle(img, (textOrg[0] - 5,textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (255, 255, 255), -1)
                #cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1)

        if det is None:
            det = positions_and_radii(-1 * np.ones((1, 1)), np.array([-1]))

        rois.append(det)
        sys.stdout.write("Detecting: {0:.2f}%, Elapsed time/frame = {1:.2f}s".
                         format(100.0 * (idx + 1) / movie.shape[-1],
                                time.time() - st) + '\r')
        sys.stdout.flush()

    print("Detecting: {0:.2f}%, Elapsed time/frame = {1:.2f}s".format(
        100.0 * (idx + 1) / movie.shape[-1],
        time.time() - st) + '\r')
    print("---")
    return rois
    #print('Elapsed time = {}'.format(time.time() - st))
    #print(all_dets)
    #cv2.imshow('img', img)
    #cv2.waitKey(0)
    #cv2.imwrite('./results_imgs/{}.png'.format(idx),img)
    #img.save('./results_imgs/{}.png'.format(idx))
示例#17
0
def main():
	cleanup()
	sys.setrecursionlimit(40000)
	config_output_filename = 'config.pickle'

	with open(config_output_filename, 'r') as f_in:
		C = pickle.load(f_in)

	# turn off any data augmentation at test time
	C.use_horizontal_flips = False
	C.use_vertical_flips = False
	C.rot_90 = False
	class_mapping = C.class_mapping

	if 'bg' not in class_mapping:
		class_mapping['bg'] = len(class_mapping)

	class_mapping = {v: k for k, v in class_mapping.iteritems()}
	print(class_mapping)
	class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping}
	C.num_rois = num_rois

	if K.image_dim_ordering() == 'th':
		input_shape_img = (3, None, None)
		input_shape_features = (1024, None, None)
	else:
		input_shape_img = (None, None, 3)
		input_shape_features = (None, None, 1024)


	img_input = Input(shape=input_shape_img)
	roi_input = Input(shape=(C.num_rois, 4))
	feature_map_input = Input(shape=input_shape_features)

	# define the base network (resnet here, can be VGG, Inception, etc)
	shared_layers = nn.nn_base(img_input, trainable=True)

	# define the RPN, built on the base layers
	num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
	rpn_layers = nn.rpn(shared_layers, num_anchors)

	classifier = nn.classifier(feature_map_input, roi_input, C.num_rois, nb_classes=len(class_mapping), trainable=True)

	model_rpn = Model(img_input, rpn_layers)
	model_classifier_only = Model([feature_map_input, roi_input], classifier)

	model_classifier = Model([feature_map_input, roi_input], classifier)

	model_rpn.load_weights(C.model_path, by_name=True)
	model_classifier.load_weights(C.model_path, by_name=True)

	model_rpn.compile(optimizer='sgd', loss='mse')
	model_classifier.compile(optimizer='sgd', loss='mse')

	all_imgs = []

	classes = {}

	bbox_threshold = 0.8

	visualise = True

	print("Converting video to images..")
	convert_to_images()
	print("anotating...")

	list_files = sorted(get_file_names(img_path), key=lambda var:[int(x) if x.isdigit() else x for x in re.findall(r'[^0-9]|[0-9]+', var)])
	for img_name in list_files:
		if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
			continue
		print(img_name)
		st = time.time()
		filepath = os.path.join(img_path,img_name)
		img = cv2.imread(filepath)
		X = format_img(img, C)

		img_scaled = np.transpose(X.copy()[0, (2, 1, 0), :, :], (1, 2, 0)).copy()
		img_scaled[:, :, 0] += 123.68
		img_scaled[:, :, 1] += 116.779
		img_scaled[:, :, 2] += 103.939

		img_scaled = img_scaled.astype(np.uint8)

		if K.image_dim_ordering() == 'tf':
			X = np.transpose(X, (0, 2, 3, 1))

		# get the feature maps and output from the RPN
		[Y1, Y2, F] = model_rpn.predict(X)


		R = roi_helpers.rpn_to_roi(Y1, Y2, C, K.image_dim_ordering(), overlap_thresh=0.7)

		# convert from (x1,y1,x2,y2) to (x,y,w,h)
		R[:, 2] -= R[:, 0]
		R[:, 3] -= R[:, 1]

		# apply the spatial pyramid pooling to the proposed regions
		bboxes = {}
		probs = {}

		for jk in range(R.shape[0]//C.num_rois + 1):
			ROIs = np.expand_dims(R[C.num_rois*jk:C.num_rois*(jk+1), :], axis=0)
			if ROIs.shape[1] == 0:
				break

			if jk == R.shape[0]//C.num_rois:
				#pad R
				curr_shape = ROIs.shape
				target_shape = (curr_shape[0],C.num_rois,curr_shape[2])
				ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
				ROIs_padded[:, :curr_shape[1], :] = ROIs
				ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
				ROIs = ROIs_padded

			[P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

			for ii in range(P_cls.shape[1]):

				if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
					continue

				cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

				if cls_name not in bboxes:
					bboxes[cls_name] = []
					probs[cls_name] = []

				(x, y, w, h) = ROIs[0, ii, :]

				cls_num = np.argmax(P_cls[0, ii, :])
				try:
					(tx, ty, tw, th) = P_regr[0, ii, 4*cls_num:4*(cls_num+1)]
					tx /= C.classifier_regr_std[0]
					ty /= C.classifier_regr_std[1]
					tw /= C.classifier_regr_std[2]
					th /= C.classifier_regr_std[3]
					x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
				except:
					pass
				bboxes[cls_name].append([16*x, 16*y, 16*(x+w), 16*(y+h)])
				probs[cls_name].append(np.max(P_cls[0, ii, :]))

		all_dets = []
		all_objects = []

		for key in bboxes:
			bbox = np.array(bboxes[key])

			new_boxes, new_probs = roi_helpers.non_max_suppression_fast(bbox, np.array(probs[key]), overlap_thresh=0.5)
			for jk in range(new_boxes.shape[0]):
				(x1, y1, x2, y2) = new_boxes[jk,:]

				cv2.rectangle(img_scaled,(x1, y1), (x2, y2), class_to_color[key],2)

				textLabel = '{}: {}'.format(key,int(100*new_probs[jk]))
				all_dets.append((key,100*new_probs[jk]))
				all_objects.append((key, 1))

				(retval,baseLine) = cv2.getTextSize(textLabel,cv2.FONT_HERSHEY_COMPLEX,1,1)
				textOrg = (x1, y1-0)

				cv2.rectangle(img_scaled, (textOrg[0] - 5, textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (0, 0, 0), 2)
				cv2.rectangle(img_scaled, (textOrg[0] - 5,textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (255, 255, 255), -1)
				cv2.putText(img_scaled, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1)
		print('Elapsed time = {}'.format(time.time() - st))
		height, width, channels = img_scaled.shape
		cv2.rectangle(img_scaled, (0,0), (width, 30), (0, 0, 0), -1)
		cv2.putText(img_scaled, "Obj count: " + str(list(accumulate(all_objects))), (5, 19), cv2.FONT_HERSHEY_TRIPLEX, 0.5, (255, 255, 255), 1)
		cv2.imwrite(os.path.join(output_path, img_name), img_scaled)
		print(all_dets)
	print("saving to video..")
	save_to_video()
示例#18
0
    def __init__(self,
                 config_path: str,
                 model_path: str,
                 num_rois: int = 4,
                 bbox_threshold: int = 0.01,
                 use_horizontal_flips: bool = False,
                 use_vertical_flips: bool = False,
                 rot_90: bool = False):
        self.model_path = model_path
        self.bbox_threshold = bbox_threshold

        with open(config_path, 'rb') as f_in:
            self.config = pickle.load(f_in)

        self.config.use_horizontal_flips = use_horizontal_flips
        self.config.use_vertical_flips = use_vertical_flips
        self.config.rot_90 = rot_90

        self.num_rois = num_rois
        self.class_mapping = self.config.class_mapping
        if 'bg' not in self.class_mapping:
            self.class_mapping['bg'] = len(self.class_mapping)
        self.class_mapping = {v: k for k, v in self.class_mapping.items()}
        class_to_color = {
            self.class_mapping[v]: np.random.randint(0, 255, 3)
            for v in self.class_mapping
        }

        num_features = 1024

        if K.image_dim_ordering() == 'th':
            input_shape_img = (3, None, None)
            input_shape_features = (num_features, None, None)
        else:
            input_shape_img = (None, None, 3)
            input_shape_features = (None, None, num_features)

        img_input = Input(shape=input_shape_img)
        roi_input = Input(shape=(self.num_rois, 4))
        feature_map_input = Input(shape=input_shape_features)

        # define the base network (resnet here, can be VGG, Inception, etc)
        shared_layers = nn.nn_base(img_input, trainable=True)

        # define the RPN, built on the base layers
        num_anchors = len(self.config.anchor_box_scales) * len(
            self.config.anchor_box_ratios)
        rpn_layers = nn.rpn(shared_layers, num_anchors)

        classifier = nn.classifier(feature_map_input,
                                   roi_input,
                                   self.num_rois,
                                   nb_classes=len(self.class_mapping),
                                   trainable=True)

        self.model_rpn = Model(img_input, rpn_layers)
        self.model_classifier_only = Model([feature_map_input, roi_input],
                                           classifier)

        self.model_classifier = Model([feature_map_input, roi_input],
                                      classifier)

        # LOAD WEIGHTS
        self.model_rpn.load_weights(self.model_path, by_name=True)
        self.model_classifier.load_weights(self.model_path, by_name=True)

        self.model_rpn.compile(optimizer='sgd', loss='mse')
        self.model_classifier.compile(optimizer='sgd', loss='mse')
示例#19
0
	input_shape_img = (None, None, 3)
	input_shape_features = (None, None, num_features)


img_input = Input(shape=input_shape_img)
roi_input = Input(shape=(C.num_rois, 4))
feature_map_input = Input(shape=input_shape_features)

# define the base network (resnet here, can be VGG, Inception, etc)
shared_layers = nn.nn_base(img_input, trainable=True)

# define the RPN, built on the base layers
num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
rpn_layers = nn.rpn(shared_layers, num_anchors)

classifier = nn.classifier(feature_map_input, roi_input, C.num_rois, nb_classes=len(class_mapping), trainable=True)

model_rpn = Model(img_input, rpn_layers)
model_classifier_only = Model([feature_map_input, roi_input], classifier)

model_classifier = Model([feature_map_input, roi_input], classifier)

print('Loading weights from {}'.format(C.model_path))
model_rpn.load_weights(C.model_path, by_name=True)
model_classifier.load_weights(C.model_path, by_name=True)

model_rpn.compile(optimizer='sgd', loss='mse')
model_classifier.compile(optimizer='sgd', loss='mse')

all_imgs = []
def tf_fit_img(img, C,filename):
	K.clear_session()
	if C.network == 'resnet50':
		import keras_frcnn.resnet as nn
	elif C.network == 'vgg':
		import keras_frcnn.vgg as nn
	if C.network == 'resnet50':
		num_features = 1024
	elif C.network == 'vgg':
		num_features = 512
	class_mapping = C.class_mapping

	if 'bg' not in class_mapping:
		class_mapping['bg'] = len(class_mapping)

	class_mapping = {v: k for k, v in class_mapping.items()}
	print(class_mapping)
	class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping}
	if K.image_dim_ordering() == 'th':
		input_shape_img = (3, None, None)
		input_shape_features = (num_features, None, None)
	else:
		input_shape_img = (None, None, 3)
		input_shape_features = (None, None, num_features)

	img_input = Input(shape=input_shape_img)
	roi_input = Input(shape=(C.num_rois, 4))
	feature_map_input = Input(shape=input_shape_features)

	# define the base network (resnet here, can be VGG, Inception, etc)
	shared_layers = nn.nn_base(img_input, trainable=True)

	# define the RPN, built on the base layers
	num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
	rpn_layers = nn.rpn(shared_layers, num_anchors)

	classifier = nn.classifier(feature_map_input, roi_input, C.num_rois, nb_classes=len(class_mapping), trainable=True)

	model_rpn = Model(img_input, rpn_layers)
	model_classifier_only = Model([feature_map_input, roi_input], classifier)

	model_classifier = Model([feature_map_input, roi_input], classifier)

	print('Loading weights from {}'.format(C.model_path))
	model_rpn.load_weights(C.model_path, by_name=True)
	model_classifier.load_weights(C.model_path, by_name=True)

	model_rpn.compile(optimizer='sgd', loss='mse')
	model_classifier.compile(optimizer='sgd', loss='mse')

	bbox_threshold = 0.8
	X, ratio = format_img(img, C)

	if K.image_dim_ordering() == 'tf':
		X = np.transpose(X, (0, 2, 3, 1))

	# get the feature maps and output from the RPN
	# print(X)
	[Y1, Y2, F] = model_rpn.predict(X)


	R = roi_helpers.rpn_to_roi(Y1, Y2, C, K.image_dim_ordering(), overlap_thresh=0.7)

	# convert from (x1,y1,x2,y2) to (x,y,w,h)
	R[:, 2] -= R[:, 0]
	R[:, 3] -= R[:, 1]

	# apply the spatial pyramid pooling to the proposed regions
	bboxes = {}
	probs = {}

	for jk in range(R.shape[0]//C.num_rois + 1):
		ROIs = np.expand_dims(R[C.num_rois*jk:C.num_rois*(jk+1), :], axis=0)
		if ROIs.shape[1] == 0:
			break

		if jk == R.shape[0]//C.num_rois:
			#pad R
			curr_shape = ROIs.shape
			target_shape = (curr_shape[0],C.num_rois,curr_shape[2])
			ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
			ROIs_padded[:, :curr_shape[1], :] = ROIs
			ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
			ROIs = ROIs_padded

		[P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

		for ii in range(P_cls.shape[1]):

			if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
				continue

			cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

			if cls_name not in bboxes:
				bboxes[cls_name] = []
				probs[cls_name] = []

			(x, y, w, h) = ROIs[0, ii, :]

			cls_num = np.argmax(P_cls[0, ii, :])
			try:
				(tx, ty, tw, th) = P_regr[0, ii, 4*cls_num:4*(cls_num+1)]
				tx /= C.classifier_regr_std[0]
				ty /= C.classifier_regr_std[1]
				tw /= C.classifier_regr_std[2]
				th /= C.classifier_regr_std[3]
				x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
			except:
				pass
			bboxes[cls_name].append([C.rpn_stride*x, C.rpn_stride*y, C.rpn_stride*(x+w), C.rpn_stride*(y+h)])
			probs[cls_name].append(np.max(P_cls[0, ii, :]))

	all_dets = []
	detect_imgs = []

	for key in bboxes:
		bbox = np.array(bboxes[key])
		count = 0
		newPic_name = "box_{}.jpg".format(str(filename[:-4]+ str(count)))
		count += 1
		detect_imgs.append(newPic_name)
		original_img = cv2.imread('./static/tmp_pic/'+filename)
		height, width, _ = original_img.shape
		(resized_width, resized_height) = get_new_img_size(width, height, 300)
		resize_img = cv2.resize(original_img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC)
		cv2.imwrite("./static/img/doc/" + filename, resize_img)
		new_boxes, new_probs = roi_helpers.non_max_suppression_fast(bbox, np.array(probs[key]), overlap_thresh=0.5)
		for jk in range(new_boxes.shape[0]):
			(x1, y1, x2, y2) = new_boxes[jk,:]

			(real_x1, real_y1, real_x2, real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)
			gt_x1, gt_x2 = real_x1 * (resized_width/width), real_x2 * (resized_width/width)
			gt_y1, gt_y2 = real_y1 * (resized_height/height), real_y2 * (resized_height/height)
			gt_x1, gt_y1, gt_x2, gt_y2 = int(gt_x1), int(gt_y1), int(gt_x2), int(gt_y2)
			color = (0, 255, 0)
			result_img = cv2.rectangle(resize_img, (gt_x1, gt_y1), (gt_x2, gt_y2), color, 2)

			cv2.imwrite("./static/img/doc/" + newPic_name, result_img)
			textLabel = '{}: {}'.format(key,int(100*new_probs[jk]))
			all_dets.append((key,100*new_probs[jk]))

			(retval,baseLine) = cv2.getTextSize(textLabel,cv2.FONT_HERSHEY_COMPLEX,1,1)
			textOrg = (real_x1, real_y1-0)

			cv2.rectangle(img, (textOrg[0] - 5, textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (0, 0, 0), 2)
			cv2.rectangle(img, (textOrg[0] - 5,textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (255, 255, 255), -1)
			cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1)

	# print('Elapsed time = {}'.format(time.time() - st))
	print(all_dets)
	if len(detect_imgs) > 0:
		print(detect_imgs[0])
		return detect_imgs[0], all_dets
	else:
		return "Can not detect"