示例#1
0
    def get_detection_output(self, nms_th=0.45, cls_th=0.6):

        output = dict()
        ps = [[] for i in range(len(self.input_names))]

        prior = self.net.mbox_prior.astype(np.float32)
        loc = self.net.mbox_loc.data[0]
        conf = self.net.mbox_conf_softmax_reahpe.data[0]

        cand = []
        loc = ssd.decoder(loc, prior)
        for label in range(1, 21):
            #cand_score = np.where(conf[:, label] > cls_th)
            scores = conf[:, label]  #[cand_score]
            cand_loc = loc  #[cand_score]
            k = bbox.nms(cand_loc, scores, nms_th, 300)
            for i in k:
                if scores[i] > cls_th:
                    cand.append(np.hstack([[label], [scores[i]], cand_loc[i]]))
                    ps[0].append(
                        (float(scores[i]), int(label), float(cand_loc[i][0]),
                         float(cand_loc[i][1]), float(cand_loc[i][2]),
                         float(cand_loc[i][3])))
        output[self.input_names[0]] = ps[0]

        return output
示例#2
0
def detect_faces(fnames, model, pre_thresh, resize=512):
    results, faces = [], []
    for fname in tqdm(fnames, desc='detecting faces'):
        try:
            img = cv2.imread(fname)
            height, width, _ = img.shape
            ratio = resize / height
            img = cv2.resize(img, (resize, int(ratio * width)))
        except Exception as e:
            print(e + ' @' + fname)

        bboxlist = detect(model, img)

        keep = nms(bboxlist, 0.3)
        bboxlist = bboxlist[keep, :]
        keep = bboxlist[:, 4] > pre_thresh
        bboxlist = bboxlist[keep, :]
        bboxlist[:, :4] = bboxlist[:, :4] / ratio

        results.append(Face(fname, bboxlist, None))
        for b in bboxlist:
            # x1, y1, x2, y2, s = b
            face = Face(fname, b, b[4])
            faces.append(face)
    return results, faces
示例#3
0
def detect(prior, loc, conf, nms_th=0.45, cls_th=0.6):
    cand = []
    loc = ssd.decoder(loc, prior)
    for label in range(1, 21):
        cand_score = np.where(conf[:, label] > cls_th)
        scores = conf[:, label][cand_score]
        cand_loc = loc[cand_score]
        k = bbox.nms(cand_loc, scores, nms_th, 300)
        for i in k:
            cand.append(np.hstack([[label], [scores[i]], cand_loc[i]]))
    cand = np.array(cand)
    return cand
示例#4
0
def detect_faces(net: nn.Module,
                 img: np.ndarray,
                 minscale: int = 3,
                 ovr_threshhold: float = 0.3,
                 score_threshhold: float = 0.5) -> List[Tuple]:
    """returns an list of tuples describing bounding boxes: [x1,y1,x2,y2,score].
    Setting minscale to 0 finds the smallest faces, but takes the longest.
    """
    bboxlist = detect(net, img, minscale)
    keep_idx = nms(bboxlist, ovr_threshhold)
    bboxlist = bboxlist[keep_idx, :]
    out = []
    for b in bboxlist:
        x1, y1, x2, y2, s = b
        if s < 0.5:
            continue
        out.append((int(x1), int(y1), int(x2), int(y2), s))
    return out
示例#5
0
def detect_faces(fnames, model, pre_thresh=0.3, resize=512):
    results = []
    for fname in tqdm(fnames, desc='detecting faces'):
        try:
            img = cv2.imread(fname)
            height, width, _ = img.shape
            ratio = resize / height
            if ratio < 1:
                img = cv2.resize(img, (resize, int(ratio * width)))
        except Exception as e:
            print(e + ' @' + fname)

        bboxlist = detect(model, img)

        keep = nms(bboxlist, 0.3)
        bboxlist = bboxlist[keep, :]
        keep = bboxlist[:, 4] > pre_thresh
        bboxlist = bboxlist[keep, :]
        bboxlist[:, :4] = bboxlist[:, :4] / ratio

        results.append({"fname": os.path.basename(fname), "bboxes": bboxlist})

    return results
示例#6
0
def inference(args):
    model_file = osp.join(
        args.output_dir, args.ex_dir,
        'epoch_{:d}'.format(args.epoch_num) + '.ckpt')
    if not os.path.exists(os.path.join(args.output_dir, args.ex_dir)):
        os.makedirs(os.path.join(args.output_dir, args.ex_dir))
    if not os.path.exists(os.path.join(args.output_dir, args.ex_dir, 'eval')):
        os.makedirs(os.path.join(args.output_dir, args.ex_dir, 'eval'))

    tfconfig = tf.ConfigProto(allow_soft_placement=True)    #선택된 device 사용할 수 없을때 다른 장치를 찾기를 원할때
    tfconfig.gpu_options.allow_growth = True                #실행 과정에서 요구되는 만큼의 GPU 메모리만 할당하게 함
    colors = [l.color for l in label_defs[args.test_set]]
    AP = APCalculate(args.iou_threshold, args.test_set)

    if args.batch_size % args.num_gpus is not 0:
        print('please enter batch_size and num_gpus again(could not divide)')
        exit()
    args.batch_size = args.batch_size//args.num_gpus

    with tf.device('/cpu:0'):
        dataset = Dataset(os.path.dirname(__file__), args.batch_size, 'test', args.num_gpus)
        num_valid = dataset.valid_train
        args.num_cl = dataset.num_classes
        net = SSD(args, args.backbone_dir)
        for i in range(args.num_gpus):
            with tf.device('gpu:' + str(i)):
                with tf.variable_scope(tf.get_variable_scope(), reuse=(i > 0)):
                    net.create_network(i * args.batch_size, (i + 1) * args.batch_size)
                    saver = tf.train.Saver()

                # if i == 0:
                # Evaluate model (with test logits, for dropout to be disabled)
                #    correct_pred = tf.equal(tf.argmax(logits_test, 1), tf.argmax(_y, 1))
                #    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

        total_batch = num_valid // (args.batch_size * args.num_gpus)

        tfconfig = tf.ConfigProto(allow_soft_placement=True)  # 선택된 device 사용할 수 없을때 다른 장치를 찾기를 원할때
        tfconfig.gpu_options.allow_growth = True  # 실행 과정에서 요구되는 만큼의 GPU 메모리만 할당하게 함
        # config.gpu_options.per_process_gpu_memory_fraction = 0.4 # GPU 몇프로 사용할 것인지 0~1

        sess = tf.Session(config=tfconfig)
        # Initialize the variables (i.e. assign their default value)

        init = tf.global_variables_initializer()
        sess.run(init)

        iterator = dataset.get_iterator()
        next_element = iterator.get_next()

        saver.restore(sess, model_file)
        sess.run(iterator.initializer)
        end_flag = 0

        for iter, _ in enumerate(tqdm(range(total_batch+1), desc='# ' + args.test_set + ' evaluation processing')):
            # batch32 , num_gpu = 2 -> 32*2 = 64 data씩 가져오게..
            # TODO: get_batch
            test_batch = dict()
            output_data = sess.run(next_element)
            if iter == 0:
                input_first = output_data
            if iter == total_batch:
                if num_valid % (args.batch_size * args.num_gpus) == 0:
                    break
                else:
                    test_batch['image'] = np.append(output_data[0], input_first[0][:args.batch_size*args.num_gpus-num_valid % (args.batch_size * args.num_gpus)], axis=0)
            if iter < total_batch:
                test_batch['image'] = output_data[0]  # 0~255

            feed_dict = {net.image: test_batch['image']}
            run_list = [tf.get_collection('bbox_pred1'), tf.get_collection('bbox_pred2'),
                        tf.get_collection('bbox_pred3'), tf.get_collection('bbox_pred4'),
                        tf.get_collection('bbox_pred5'), tf.get_collection('bbox_pred6')]
            pred1, pred2, pred3, pred4, pred5, pred6 = sess.run(run_list, feed_dict=feed_dict)

            for j in range(args.num_gpus):
                for l in range(args.batch_size):
                    pred = [pred1[j][l], pred2[j][l], pred3[j][l], pred4[j][l], pred5[j][l], pred6[j][l]]
                    bbox_list = list()

                    for k, prediction in enumerate(pred):
                        bbox_list += bbox.translate_pred_to_bbox(prediction, (args.img_h, args.img_w), args.num_cl, net.pred_infos[k])

                    bbox_list.sort(reverse=True, key=lambda student: student.conf)

                    top_k = 200
                    if len(bbox_list) < top_k:
                        bbox_list = bbox_list[:len(bbox_list)]
                    else:
                        bbox_list = bbox_list[:top_k]

                    bbox_list2 = defaultdict(list)
                    [bbox_list2[boxes.cl].append(boxes) for boxes in bbox_list]

                    bbox_list_nms = list()
                    for k in range(args.num_cl):
                        bbox_list_nms += bbox.nms(bbox_list2[k], args.nms_threshold)

                    img = test_batch['image'][j*args.batch_size+l].astype(np.uint8)
                    test_box_img = np.copy(img)
                    [bbox.draw_box(test_box_img, box, label_defs[args.test_set][box.cl].name, colors[box.cl]) for box in bbox_list_nms]
                    test_box_img = cv2.cvtColor(test_box_img, cv2.COLOR_BGR2RGB)
                    if args.show_img == True:
                        cv2.imshow('box_img', test_box_img)
                        cv2.waitKey(0)

                    gt_id = output_data[1][j*args.batch_size+l][len(output_data[1][j*args.batch_size+l])-10:len(output_data[1][j*args.batch_size+l])-4].decode("utf-8")
                    gt_label = output_data[2][j*args.batch_size+l]
                    gt_boxes_cord = output_data[4][j*args.batch_size+l] # read gt_label and boxes_cordinates 100 size

                    AP.seperate_class(gt_boxes_cord, gt_label, gt_id, bbox_list_nms)
                    if args.save_img == True:
                        cv2.imwrite(os.path.join(args.output_dir, args.ex_dir, 'eval/') + gt_id + '.png', test_box_img)
                    if iter == total_batch and j * args.batch_size + l + 1 == num_valid % (args.batch_size * args.num_gpus):
                        end_flag = 1
                        break
                if end_flag == 1:
                    break

        AP.compute_ap()
        print('\n------ VOC07 metric ------')
        for i in range(args.num_cl):
            print(label_defs[args.test_set][i].name, "AP : {:.2f}".format(AP.AP[i]))

        print('mAP : ', AP.APs)
示例#7
0
def train(args):
    if not os.path.exists(args.log_dir):
        os.makedirs(args.log_dir)
    train_log_dir = os.path.join(args.log_dir, 'train')
    valid_log_dir = os.path.join(args.log_dir, 'valid')
    if not os.path.exists(train_log_dir):
        os.makedirs(train_log_dir)
    if not os.path.exists(valid_log_dir):
        os.makedirs(valid_log_dir)
    if not os.path.exists(os.path.join(args.output_dir, args.ex_dir)):
        os.makedirs(os.path.join(args.output_dir, args.ex_dir))
    if not os.path.exists(os.path.join(args.output_dir, args.ex_dir, 'image')):
        os.makedirs(os.path.join(args.output_dir, args.ex_dir, 'image'))
    train_log_writer = SummaryWriter(train_log_dir)
    # valid_log_writer = SummaryWriter(valid_log_dir)

    if args.batch_size % args.num_gpus is not 0:
        print('please enter batch_size and num_gpus again(could not divide)')
        exit()
    args.batch_size = args.batch_size // args.num_gpus

    with tf.device('/cpu:0'):
        tower_grads = []
        loss_tmp = []
        cls_loss_tmp = []
        loc_loss_tmp = []
        regul_loss_tmp = []

        learning_rate = tf.placeholder(tf.float32, shape=[])
        optim = tf.train.MomentumOptimizer(learning_rate, args.momentum)
        dataset = Dataset(os.path.dirname(__file__), args.batch_size, 'train',
                          args.num_gpus)
        num_trains = dataset.num_train
        args.num_cl = dataset.num_classes
        net = SSD(args, args.backbone_dir)
        for i in range(args.num_gpus):
            with tf.device('gpu:' + str(i)):
                with tf.variable_scope(tf.get_variable_scope(), reuse=(i > 0)):
                    net.create_network(i * args.batch_size,
                                       (i + 1) * args.batch_size)
                    cls_loss, loc_loss, regul_loss, total_loss = net.create_basic_loss(
                        i * args.batch_size, (i + 1) * args.batch_size)

                    loss_tmp.append(total_loss)
                    loc_loss_tmp.append(loc_loss)
                    cls_loss_tmp.append(cls_loss)
                    regul_loss_tmp.append(regul_loss)
                    # train_op = optim.minimize(total_loss)

                    grads = optim.compute_gradients(
                        total_loss
                    )  # This is the first part of minimize() //  gradient 값을 계산한 후,
                    tower_grads.append(grads)

        total_batch = num_trains // (args.batch_size * args.num_gpus)
        tower_grads = average_gradients(tower_grads)
        train_op = optim.apply_gradients(
            tower_grads
        )  # This is the second part of minimize() // learning rate를 곱하여 기존 parameter에서 뺀 값으로 업데이트 시키도록

        total_losss = tf.reduce_mean(loss_tmp)
        global_step = tf.Variable(0, trainable=False)
        #train_op = optim.minimize(total_loss, global_step, colocate_gradients_with_ops=True)
        cls_losss = tf.reduce_mean(cls_loss_tmp)
        loc_losss = tf.reduce_mean(loc_loss_tmp)
        regul_losss = tf.reduce_mean(
            regul_loss_tmp
        )  # same as tf.add_n(tf.get_collection('regul_loss'))
        tfconfig = tf.ConfigProto(
            allow_soft_placement=True)  # 선택된 device 사용할 수 없을때 다른 장치를 찾기를 원할때
        tfconfig.gpu_options.allow_growth = True  # 실행 과정에서 요구되는 만큼의 GPU 메모리만 할당하게 함
        # config.gpu_options.per_process_gpu_memory_fraction = 0.4 # GPU 몇프로 사용할 것인지 0~1

        sess = tf.Session(config=tfconfig)
        # Initialize the variables (i.e. assign their default value)
        init = tf.global_variables_initializer()

        sess.run(init)
        global_iter = 0
        lr = args.lr

        iterator = dataset.get_iterator()
        next_element = iterator.get_next()
        for epoch in range(args.max_epoch):
            sess.run(iterator.initializer)
            for iter in range(
                    total_batch + 1
            ):  # ex)total_image = 10000, batch = 10 -> total_batch = 10000/10
                # batch32 , num_gpu = 2 -> 32*2 = 64 data씩 가져오게..
                # print(objgraph.show_growth())
                # TODO: get_batch
                input_data = sess.run(next_element)
                train_batch = dict()
                if iter == 0:
                    input_first = input_data
                if iter == total_batch:
                    if num_trains % (args.batch_size * args.num_gpus) == 0:
                        break
                    else:
                        train_batch['image'] = np.append(
                            input_data[0],
                            input_first[0][:args.batch_size * args.num_gpus -
                                           num_trains %
                                           (args.batch_size * args.num_gpus)],
                            axis=0)
                        #train_list = np.append(input_data[2], input_first[2][:args.batch_size-num_trains % (args.batch_size * args.num_gpus)], axis=0)
                        imsis = np.append(
                            input_data[1],
                            input_first[1][:args.batch_size * args.num_gpus -
                                           num_trains %
                                           (args.batch_size * args.num_gpus)],
                            axis=0)
                        train_batch['gt_cl'] = imsis[:, :, :args.num_cl + 1]
                        train_batch['gt_loc'] = imsis[:, :, args.num_cl + 1:]

                if iter < total_batch:
                    train_batch['image'] = input_data[0]  # 0~255
                    train_batch['gt_cl'] = input_data[1][:, :, :args.num_cl +
                                                         1]  # (32, 8732, 9)
                    train_batch['gt_loc'] = input_data[1][:, :, args.num_cl +
                                                          1:]  # (32, 8732, 4)
                    train_list = input_data[2]

                #print(train_list)
                feed_dict = {
                    learning_rate: lr,
                    net.image: train_batch['image'],
                    net.gt_loc: train_batch['gt_loc'],
                    net.gt_cl: train_batch['gt_cl']
                }
                run_list = [
                    cls_losss, loc_losss, regul_losss, total_losss, train_op
                ]  # 우리가 궁금한것 + gradient 업데이트값(train_op)
                all_cls_loss, all_loc_loss, all_regul_loss, all_total_loss, _ = sess.run(
                    run_list, feed_dict=feed_dict)

                print("Epoch %d/%d, Iter: %d/%d" %
                      (epoch, args.max_epoch, iter, total_batch + 1))
                print("\tClass loss: %f\n\tLoc loss: %f\n\tRegul loss: %f" %
                      (all_cls_loss, all_loc_loss, all_regul_loss))
                print("\tTotal Loss : %f" % (all_total_loss))
                train_log_writer.add_scalar('cl_loss', all_cls_loss,
                                            global_iter)
                train_log_writer.add_scalar('loc_loss', all_loc_loss,
                                            global_iter)
                train_log_writer.add_scalar('total_loss', all_total_loss,
                                            global_iter)

                if global_iter == 40000:
                    lr *= 0.1
                if global_iter == 50000:
                    lr *= 0.1
                global_iter += 1

                if iter == total_batch and epoch % 10 == 0 and args.save_img == True:
                    feed_dict = {net.image: train_batch['image']}
                    run_list = [
                        tf.get_collection('bbox_pred1'),
                        tf.get_collection('bbox_pred2'),
                        tf.get_collection('bbox_pred3'),
                        tf.get_collection('bbox_pred4'),
                        tf.get_collection('bbox_pred5'),
                        tf.get_collection('bbox_pred6')
                    ]
                    pred1, pred2, pred3, pred4, pred5, pred6 = sess.run(
                        run_list, feed_dict=feed_dict)
                    for j in range(args.num_gpus):
                        for l in range(args.batch_size):
                            pred = [
                                pred1[j][l], pred2[j][l], pred3[j][l],
                                pred4[j][l], pred5[j][l], pred6[j][l]
                            ]

                            bbox_list = list()
                            for k, prediction in enumerate(pred):
                                bbox_list += bbox.translate_pred_to_bbox(
                                    prediction, (args.img_w, args.img_h),
                                    args.num_cl, net.pred_infos[k])
                            bbox_list.sort(reverse=True,
                                           key=lambda student: student.conf)

                            top_k = 200
                            if len(bbox_list) < top_k:
                                bbox_list = bbox_list[:len(bbox_list)]
                            else:
                                bbox_list = bbox_list[:top_k]

                            bbox_list2 = defaultdict(list)
                            [
                                bbox_list2[bbox_list[k].cl].append(
                                    bbox_list[k])
                                for k in range(len(bbox_list))
                            ]

                            bbox_list_nms = list()
                            for k in range(args.num_cl):
                                bbox_list_nms += bbox.nms(bbox_list2[k], 0.45)

                            img = train_batch['image'][j * args.batch_size +
                                                       l].astype(np.uint8)
                            train_box_img = np.copy(img)

                            [
                                bbox.draw_box(
                                    train_box_img, box, inference.label_defs[
                                        args.train_set][box.cl].name,
                                    inference.label_defs[args.train_set][
                                        box.cl].color) for box in bbox_list_nms
                            ]
                            train_box_img = cv2.cvtColor(
                                train_box_img, cv2.COLOR_BGR2RGB)
                            train_save_path = os.path.join(
                                args.output_dir, args.ex_dir,
                                'image/') + 'epoch_{:d}'.format(
                                    epoch) + 'img_batch_{:d}'.format(
                                        j * args.batch_size + l) + '.png'
                            cv2.imwrite(train_save_path, train_box_img)
                            if args.show_img == True:
                                cv2.imshow('train_box_img', train_box_img)
                                cv2.waitKey(0)

            if epoch % 20 == 0 and epoch is not 0:
                print('save detection_result........................')
                saver = tf.train.Saver()
                snapshot(sess, saver, epoch, args.ex_dir)
示例#8
0
net.cuda()
net.eval()  # set dropout and batch normalization layers to evaluation mode

if args.path == 'CAMERA': cap = cv2.VideoCapture(0)
# cap.set(cv2.CAP_PROP_FRAME_WIDTH, 160)
# cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 120)

cv2.namedWindow('image', cv2.WINDOW_NORMAL)
while (True):
    if args.path == 'CAMERA': ret, img = cap.read()
    else: img = cv2.imread(args.path)

    t1 = time.time()
    bboxlist = detect(net, img)
    t2 = time.time()
    print("time: {}".format(t2 - t1))

    keep = nms(bboxlist, 0.3)
    bboxlist = bboxlist[keep, :]
    for b in bboxlist:
        x1, y1, x2, y2, s = b
        if s < 0.5: continue
        cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0),
                      1)
    cv2.imshow('image', img)

    if args.path == 'CAMERA':
        if cv2.waitKey(1) & 0xFF == ord('q'): break
    else:
        cv2.imwrite(args.path[:-4] + '_output.png', img)
        if cv2.waitKey(0) or True: break
示例#9
0
    def __init__(self, inputs):
        """
        Region proposal net - inputs should be a list of [convolution model, tuple(image_h, image_w, image_scale)]
        """
        self.conv_in, self.im_info = inputs
        ## inputs is a convolutional net (i.e. VGG or ZFNet) before the fully-connected layers.
        super(RPN, self).__init__(inputs)
        in_filters = self.conv_in.output_size[1] # 512
        # RPN conv layers
        classes = 2
        n_anchors = 9
        min_size = 16
        anchor_size = 16
        nms_thresh = 0.7
        topN = 2000

        self.conv = Conv2D(inputs=self.conv_in,
                           n_filters=in_filters, filter_size=(3, 3), stride=(1, 1), activation='relu', border_mode='full')

        self.cls_score = Conv2D(inputs=self.conv,
                                n_filters=classes*n_anchors, filter_size=(1, 1), stride=(1, 1), activation='linear', border_mode='valid')

        # need to dimshuffle/flatten it down to get the softmax class probabilities for each class of `classes`
        cls_shape = self.cls_score.get_outputs().shape
        cls_score = self.cls_score.get_outputs().reshape((cls_shape[0], classes, -1, cls_shape[3]))
        # shuffle to (classes, batch, row, col)
        cls_shuffle = cls_score.dimshuffle((1, 0, 2, 3))
        # flatten to (classes, batch*row*col)
        cls_flat = cls_shuffle.flatten(2)
        # shuffle to (batch*row*col, classes)
        cls_flat = cls_flat.dimshuffle((1, 0))
        # softmax for probability!
        cls_probs_flat = T.nnet.softmax(cls_flat)
        # now shuffle back up to 4D output from cls_score (undo what we did)
        cls_probs = cls_probs_flat.dimshuffle((1, 0)).reshape(cls_shuffle.shape)
        cls_probs = cls_probs.dimshuffle((1, 0, 2, 3))
        self.cls_probs = cls_probs.reshape(cls_shape)

        self.bbox_pred = Conv2D(inputs=self.conv,
                                n_filters=4*n_anchors, filter_size=(1, 1), stride=(1, 1), activation='linear', border_mode='valid')


        ###############
        #  1. Generate proposals from bbox deltas and shifted anchors (ROIs)
        ###############
        anchors = theano.shared(generate_anchors(anchor_size))
        object_probs = self.cls_probs[:, n_anchors:, :, :]
        bbox_deltas = self.bbox_pred.get_outputs()
        # height and width of convolution features
        H, W = object_probs.shape[-2:]
        # essentially do numpy's meshgrid by tiling anchors across height and width of convolution features
        shift_x = (T.arange(0, W) * anchor_size).reshape((1, W))
        shift_y = (T.arange(0, H) * anchor_size).reshape((1, H))
        shift_x = T.tile(shift_x, (H, 1))
        shift_y = T.tile(shift_y.T, (1, W))
        shifts = T.stack([shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel()]).T
        # Enumerate all shifted anchors:
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = n_anchors
        K = shifts.shape[0]
        anchors = anchors.reshape((1, A, 4)) + shifts.reshape((K, 1, 4))
        anchors = anchors.reshape((K*A, 4))
        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.dimshuffle((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the object scores:
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = object_probs.dimshuffle((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, self.im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = filter_boxes(proposals, min_size * self.im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        order = scores.ravel().argsort()[::-1]

        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 2000)
        # 8. return the top proposals (-> RoIs top)
        keep, self.updates = nms(T.concatenate([proposals, scores], axis=1), nms_thresh)
        keep = keep[:topN]
        self.proposals = proposals[keep, :]
        self.scores = scores[keep]

        self.outputs = [self.proposals, self.scores]
        # self.output_size = [self.cls_score.output_size, self.bbox_pred.output_size]

        self.params = {}
        self.params.update(p_dict("rpn_conv/3x3_", self.conv))
        self.params.update(p_dict("rpn_cls_score_", self.cls_score))
        self.params.update(p_dict("rpn_bbox_pred_", self.bbox_pred))
示例#10
0
    def __init__(self, inputs):
        """
        Region proposal net - inputs should be a list of [convolution model, tuple(image_h, image_w, image_scale)]
        """
        self.conv_in, self.im_info = inputs
        ## inputs is a convolutional net (i.e. VGG or ZFNet) before the fully-connected layers.
        super(RPN, self).__init__(inputs)
        in_filters = self.conv_in.output_size[1]  # 512
        # RPN conv layers
        classes = 2
        n_anchors = 9
        min_size = 16
        anchor_size = 16
        nms_thresh = 0.7
        topN = 2000

        self.conv = Conv2D(inputs=self.conv_in,
                           n_filters=in_filters,
                           filter_size=(3, 3),
                           stride=(1, 1),
                           activation='relu',
                           border_mode='full')

        self.cls_score = Conv2D(inputs=self.conv,
                                n_filters=classes * n_anchors,
                                filter_size=(1, 1),
                                stride=(1, 1),
                                activation='linear',
                                border_mode='valid')

        # need to dimshuffle/flatten it down to get the softmax class probabilities for each class of `classes`
        cls_shape = self.cls_score.get_outputs().shape
        cls_score = self.cls_score.get_outputs().reshape(
            (cls_shape[0], classes, -1, cls_shape[3]))
        # shuffle to (classes, batch, row, col)
        cls_shuffle = cls_score.dimshuffle((1, 0, 2, 3))
        # flatten to (classes, batch*row*col)
        cls_flat = cls_shuffle.flatten(2)
        # shuffle to (batch*row*col, classes)
        cls_flat = cls_flat.dimshuffle((1, 0))
        # softmax for probability!
        cls_probs_flat = T.nnet.softmax(cls_flat)
        # now shuffle back up to 4D output from cls_score (undo what we did)
        cls_probs = cls_probs_flat.dimshuffle(
            (1, 0)).reshape(cls_shuffle.shape)
        cls_probs = cls_probs.dimshuffle((1, 0, 2, 3))
        self.cls_probs = cls_probs.reshape(cls_shape)

        self.bbox_pred = Conv2D(inputs=self.conv,
                                n_filters=4 * n_anchors,
                                filter_size=(1, 1),
                                stride=(1, 1),
                                activation='linear',
                                border_mode='valid')

        ###############
        #  1. Generate proposals from bbox deltas and shifted anchors (ROIs)
        ###############
        anchors = theano.shared(generate_anchors(anchor_size))
        object_probs = self.cls_probs[:, n_anchors:, :, :]
        bbox_deltas = self.bbox_pred.get_outputs()
        # height and width of convolution features
        H, W = object_probs.shape[-2:]
        # essentially do numpy's meshgrid by tiling anchors across height and width of convolution features
        shift_x = (T.arange(0, W) * anchor_size).reshape((1, W))
        shift_y = (T.arange(0, H) * anchor_size).reshape((1, H))
        shift_x = T.tile(shift_x, (H, 1))
        shift_y = T.tile(shift_y.T, (1, W))
        shifts = T.stack([
            shift_x.ravel(),
            shift_y.ravel(),
            shift_x.ravel(),
            shift_y.ravel()
        ]).T
        # Enumerate all shifted anchors:
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = n_anchors
        K = shifts.shape[0]
        anchors = anchors.reshape((1, A, 4)) + shifts.reshape((K, 1, 4))
        anchors = anchors.reshape((K * A, 4))
        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.dimshuffle((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the object scores:
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = object_probs.dimshuffle((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, self.im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = filter_boxes(proposals, min_size * self.im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        order = scores.ravel().argsort()[::-1]

        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 2000)
        # 8. return the top proposals (-> RoIs top)
        keep, self.updates = nms(T.concatenate([proposals, scores], axis=1),
                                 nms_thresh)
        keep = keep[:topN]
        self.proposals = proposals[keep, :]
        self.scores = scores[keep]

        self.outputs = [self.proposals, self.scores]
        # self.output_size = [self.cls_score.output_size, self.bbox_pred.output_size]

        self.params = {}
        self.params.update(p_dict("rpn_conv/3x3_", self.conv))
        self.params.update(p_dict("rpn_cls_score_", self.cls_score))
        self.params.update(p_dict("rpn_bbox_pred_", self.bbox_pred))