示例#1
0
 def get_roidb(imdb_name):
     imdb = get_imdb(imdb_name)
     print('--------------------------------------------------------')
     print('Loaded dataset `{:s}` for training'.format(imdb.name))
     imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD)
     roidb = get_training_roidb(imdb, imdb_name)
     return roidb
示例#2
0
def extract_roidb(imdb_names, training=True):
    """
    Combine multiple roidbs
    """

    def get_training_roidb(imdb, imdb_name):
        """Returns a roidb (Region of Interest database) for use in training."""
        if cfg.TRAIN.USE_FLIPPED and 'train' in imdb_name:
            print('Appending horizontally-flipped training examples...')
            imdb.roidb
            imdb.append_flipped_images()
            print('{} images loaded after flipping images'.format(len(imdb.roidb)))

        prepare_roidb(imdb)
        # ratio_index = rank_roidb_ratio(imdb)
        return imdb.roidb

    def get_roidb(imdb_name):
        imdb = get_imdb(imdb_name)
        print('--------------------------------------------------------')
        print('Loaded dataset `{:s}` for training'.format(imdb.name))
        imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD)
        roidb = get_training_roidb(imdb, imdb_name)
        return roidb

    roidbs = [get_roidb(s) for s in imdb_names.split('+')]
    roidb = roidbs[0]

    if len(roidbs) > 1:
        for r in roidbs[1:]:
            roidb.extend(r)
        tmp = get_imdb(imdb_names.split('+')[1])
        imdb = datasets.imdb.imdb(imdb_names, tmp.classes)
    else:
        imdb = get_imdb(imdb_names)

    #if training:
    #    roidb = filter_roidb(roidb)

    ratio_list, ratio_index = rank_roidb_ratio(roidb)


    return imdb, roidb, ratio_list, ratio_index
def test():
    import os
    img_file = 'demo/images.jpeg'
    image = cv2.imread(img_file)

    #imdb_name = 'CaltechPedestrians_train'
    imdb_name = 'coco_2017_train'
    #imdb_name = 'voc_2007_trainval'
    imdb = get_imdb(imdb_name)
    cfg_file = 'experiments/cfgs/faster_rcnn_end2end.yml'
    model_dir = 'data/pretrained_model/'
    #pre_model_name = 'VGGnet_fast_rcnn_iter_70000.h5'
    pre_model_name = 'coco_2017_train_10_vgg16_0.7_b1.h5'
    #pre_model_name = 'CaltechPedestrians_train_1_vgg16_0.7_b1.h5'
    pretrained_model = model_dir + pre_model_name
    cfg_from_file(cfg_file)
    print(imdb.classes)
    if 'vgg16' in pre_model_name.split('_'):
        detector = FasterRCNN_VGG(classes=imdb.classes, debug=False)
    elif 'resnet50' in pre_model_name.split('_'):
        detector = FasterRCNN_RES(classes=imdb.classes, debug=False)
    else:
        detector = FasterRCNN_VGG(classes=imdb.classes, debug=False)
    network.load_net(pretrained_model, detector)
    detector.cuda()
    detector.eval()
    print('load model successfully!')

    blob = init_data(is_cuda=True)

    t = Timer()
    t.tic()

    dets, scores, classes = detector.detect(image,
                                            blob,
                                            thr=0.7,
                                            nms_thresh=0.3)
    runtime = t.toc()
    print('total spend: {}s'.format(runtime))

    im2show = np.copy(image)
    for i, det in enumerate(dets):
        det = tuple(int(x) for x in det)
        cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 2)
        cv2.putText(im2show, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15),\
                    cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1)

    cv2.imwrite(os.path.join('demo', 'out.jpg'), im2show)
    cv2.imshow('demo', im2show)
    cv2.waitKey(0)
示例#4
0
    def __init__(self, vcoco_set, coco_root, vcoco_root):
        super(RoiVCocoBoxes, self).__init__(vcoco_set, coco_root)

        # TODO this sets a global config, which I prefer not to do. But the
        # faster_rcnn code depends on it.
        cf.cfg_from_list(["DATA_DIR", vcoco_root])

        if vcoco_set == "vcoco_train":
            coco_split = "train"
        elif vcoco_set == "vcoco_val":
            coco_split = "val"
        else:
            raise ValueError("Invalid vcoco_set '%s'" % vcoco_set)
        imdb_name = "coco_2014_" + coco_split
        self._imdb = get_imdb(imdb_name)
        rdl_roidb.prepare_roidb(self._imdb)
        self._roidb = self._imdb.roidb

        self.cocoimgid_2_roidbindex = {
            index: i
            for i, index in enumerate(self._imdb._image_index)
        }
示例#5
0
resume = False
# ------------

if rand_seed is not None:
    np.random.seed(rand_seed)

# load config
cfg_from_file(cfg_file)
lr = cfg.TRAIN.LEARNING_RATE
momentum = cfg.TRAIN.MOMENTUM
weight_decay = cfg.TRAIN.WEIGHT_DECAY
disp_interval = cfg.TRAIN.DISPLAY
log_interval = cfg.TRAIN.LOG_IMAGE_ITERS

# load data
imdb = get_imdb(imdb_name)
rdl_roidb.prepare_roidb(imdb)
roidb = imdb.roidb
data_layer = RoIDataLayer(roidb, imdb.num_classes)

# load net
net = RFCN(classes=imdb.classes, debug=_DEBUG)
#init_modules = [net.rpn.conv1, net.rpn.score_conv, net.rpn.bbox_conv, net.fc6, net.fc7, net.score_fc, net.bbox_fc]
#network.weights_normal_init(init_modules, dev=0.01)
network.weights_normal_init(net, dev=0.01)
network.load_pretrained_npy(net, pretrained_model)
if resume:
    pretrained_model_file = 'models/saved_model3/faster_rcnn_resnet101_20000.h5'
    network.load_net(pretrained_model_file, net)
    start_step = 20000
    print 'Resume training...'
# ------------

if rand_seed is not None:
    np.random.seed(rand_seed)

# load config
cfg_from_file(cfg_file)
lr = cfg.TRAIN.LEARNING_RATE
momentum = cfg.TRAIN.MOMENTUM
weight_decay = cfg.TRAIN.WEIGHT_DECAY
disp_interval = cfg.TRAIN.DISPLAY
log_interval = cfg.TRAIN.LOG_IMAGE_ITERS

# load data
imdb_0 = get_imdb(imdb_name_0)
imdb_1 = get_imdb(imdb_name_1)
rdl_roidb.prepare_roidb(imdb_0)
rdl_roidb.prepare_roidb(imdb_1)
roidb_0 = imdb_0.roidb
roidb_1 = imdb_1.roidb
data_layer = RoIDataLayer(roidb_0, roidb_1, imdb_0.num_classes)

# In[5]:

# load net
net = FasterRCNN_y(classes=imdb_0.classes, debug=_DEBUG)
network.weights_normal_init(net, dev=0.01)
network.load_pretrained_npy_y(net, pretrained_model)
# model_file = '/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5'
# model_file = 'models/saved_model3/faster_rcnn_60000.h5'
def test():
    import os
    imdb_name = 'CaltechPedestrians_test'
    imdb = get_imdb(imdb_name)
    cfg_file = 'experiments/cfgs/faster_rcnn_end2end.yml'
    model_dir = 'data/pretrained_model/'
    pre_model_name = 'CaltechPedestrians_train_10_vgg16_0.7_b3.h5'
    pretrained_model = model_dir + pre_model_name
    cfg_from_file(cfg_file)

    if 'vgg16' in pre_model_name.split('_'):
        detector = FasterRCNN_VGG(classes=imdb.classes, debug=False)
    elif 'res' in pre_model_name.split('_'):
        detector = FasterRCNN_RES(classes=imdb.classes, debug=False)
    else:
        detector = FasterRCNN_VGG(classes=imdb.classes, debug=False)

    network.load_net(pretrained_model, detector)
    detector.cuda()
    detector.eval()
    print('load model successfully!')
    blob = init_data(is_cuda=True)

    t = Timer()
    t.tic()
    cap = cv2.VideoCapture(video_file)
    init = True
    while (cap.isOpened()):
        ret, frame = cap.read()
        if ret:
            p = Timer()
            p.tic()
            if init:
                cnt = 1
                fourcc = cv2.VideoWriter_fourcc(*'XVID')
                out = cv2.VideoWriter(output_file, fourcc, fps,
                                      (frame.shape[1], frame.shape[0]))
                init = False
            try:
                dets, scores, classes = detector.detect(frame,
                                                        blob,
                                                        thr=0.7,
                                                        nms_thresh=0.3)
                frame = np.copy(frame)
                for i, det in enumerate(dets):
                    det = tuple(int(x) for x in det)
                    cv2.rectangle(frame, det[0:2], det[2:4], (255, 205, 51), 2)
                    # cv2.putText(frame, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15), \
                    #             cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1)
                cv2.imshow('demo', frame)
                cv2.waitKey(1000)
                cv2.destroyAllWindows()
            except IndexError as e:
                pass
            finally:
                print(cnt, '-frame : {:.3f}s'.format(p.toc()))
                cnt += 1
                out.write(frame)
        else:
            break
    runtime = t.toc()
    print('{} frames  /  total spend: {}s  /  {:2.1f} fps'.format(
        cnt, int(runtime), cnt / runtime))
    cap.release()
    out.release()
示例#8
0
        # if vis:
        #     cv2.imshow('test', im2show)
        #     cv2.waitKey(1)
        if sav:
            cv2.imwrite(output_dir_detections + str(i) + '.png', im2show)

    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

    print 'Evaluating detections'
    imdb_0.evaluate_detections(all_boxes, output_dir)


if __name__ == '__main__':

    imdb_0 = get_imdb(imdb_test_name_0)
    imdb_0.competition_mode(on=True)
    net_0 = FasterRCNN(classes=imdb_0.classes, debug=False)
    network.load_net(trained_model_0, net_0)
    print('load model 0 successfully!')
    net_0.cuda()
    net_0.eval()

    imdb_1 = get_imdb(imdb_test_name_1)
    imdb_1.competition_mode(on=True)
    net_1 = FasterRCNN(classes=imdb_1.classes, debug=False)
    network.load_net(trained_model_1, net_1)
    print('load model 1 successfully!')
    net_1.cuda()
    net_1.eval()
rand_seed = 1024

save_name = 'vg'
max_per_image = 300
thresh = 0.05
# ------------

if rand_seed is not None:
    np.random.seed(rand_seed)

# load config
cfg_from_file(cfg_file)

for imdb_fn in (
    'vg',
    'vg_val',
    ):
    imdb = get_imdb(imdb_fn)
    imdb.competition_mode(on=True)

    net = FasterRCNN(classes=imdb.classes, debug=True)
    network.load_net(trained_model, net)

    net.cuda()
    net.eval()
    print('load model successfully!')

    get_preds(imdb_fn + '_out', net, imdb, max_per_image,
        thresh=thresh, test_bbox_reg=cfg.TEST.BBOX_REG, vis=False)

print("DONE!")
# ------------

if rand_seed is not None:
    np.random.seed(rand_seed)

# load config
cfg_from_file(cfg_file)
lr = cfg.TRAIN.LEARNING_RATE
momentum = cfg.TRAIN.MOMENTUM
weight_decay = cfg.TRAIN.WEIGHT_DECAY
disp_interval = cfg.TRAIN.DISPLAY
log_interval = cfg.TRAIN.LOG_IMAGE_ITERS

# load data
imdb = get_imdb(imdb_name)  #是一个pascal_voc对象!

#对imdb的roidb进行进一步的处理方便训练使用,在pascal_voc生成的roidb的字典中
#添加,“image”:图像的路径,“weight”,"height","max_overlaps","max_classes"
rdl_roidb.prepare_roidb(imdb)
roidb = imdb.roidb  #返回的是一个列表

#用于生成数据,相当于pytorch中的dataloader的功能,会对原始的图像进行变换,以用于训练
data_layer = RoIDataLayer(roidb, imdb.num_classes)
#forward方法会返回一个batch的图像数据,并同时对图像进行了以下操作。
#1.将原图像进行了缩放以进行训练。例如:(356,500,3) == >  (600,843,3)在配置中
#用于训练的图片最短边为600,因此在保证原图的长宽比的情况下,对图像进行了缩放,并记录下
#了缩放的尺度上例中:scale为600/356 = 1.68539,同时相应的将GT_box的值也乘以相应的scale

#=============================================
#最终返回的blobs为一个字典:
        print ('im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
            .format(i + 1, num_images, detect_time, nms_time))

        if vis:
            cv2.imshow('test', im2show)
            cv2.waitKey(1)

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('=====Evaluating detections=====')  #评估检测结果,计算MAP。
    imdb.evaluate_detections(all_boxes, output_dir)


if __name__ == '__main__':
    # load data
    imdb = get_imdb(imdb_name)  #返回一个pascal_voc对象
    imdb.competition_mode(on=True)
    print("load data finished!")

    # load net
    net = FasterRCNN(classes=imdb.classes, debug=False)
    network.load_net(trained_model, net)  #加载训练好的模型。
    print('load model successfully!')

    # net.cuda()
    net.eval()

    # evaluation
    test_net(save_name, net, imdb, max_per_image, thresh=thresh, vis=vis)
示例#12
0
# ------------

if rand_seed is not None:
    np.random.seed(rand_seed)

# load config
cfg_from_file(cfg_file)
lr = cfg.TRAIN.LEARNING_RATE
momentum = cfg.TRAIN.MOMENTUM
weight_decay = cfg.TRAIN.WEIGHT_DECAY
disp_interval = cfg.TRAIN.DISPLAY
log_interval = cfg.TRAIN.LOG_IMAGE_ITERS

# load data
imdb = get_imdb(imdb_name, img_set)
rdl_roidb.prepare_roidb(imdb)
roidb = imdb.roidb
data_layer = RoIDataLayer(roidb, imdb.num_classes)
print "number of classes: " + str(imdb.num_classes)
# load net
net = FasterRCNN(classes=imdb.classes, debug=_DEBUG)
network.weights_normal_init(net, dev=0.01)
network.load_pretrained_npy(net, pretrained_model)
# model_file = '/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5'
# model_file = 'models/saved_model3/faster_rcnn_60000.h5'
# network.load_net(model_file, net)
# exp_name = 'vgg16_02-19_13-24'
# start_step = 60001
# lr /= 10.
# network.weights_normal_init([net.bbox_fc, net.score_fc, net.fc6, net.fc7], dev=0.01)
示例#13
0
rand_seed = 1024

save_name = 'vg'
max_per_image = 300
thresh = 0.05
# ------------

if rand_seed is not None:
    np.random.seed(rand_seed)

# load config
cfg_from_file(cfg_file)

# load data
imdb = get_imdb('vg_val_small')
imdb.competition_mode(on=True)

# load net
net = FasterRCNN(classes=imdb.classes, debug=True)
network.load_net(trained_model, net)
print('load model successfully!')

net.cuda()
net.eval()

# # evaluation
test_net(save_name,
         net,
         imdb,
         max_per_image,
示例#14
0
def track():
    def id_track(dataset, features):
        from collections import Counter
        def dist(f1, f2):
            score = (torch.sqrt((f1 - f2) ** 2)).sum(0).data.cpu().numpy()[0]
            return score

        id_list = []
        id_count = {'f' + str(i): [] for i in range(len(features))}
        for dataframe in dataset:
            for i, f in enumerate(features):
                init_val = 1e15
                for data in dataframe:
                    score = dist(f, data['feature'])
                    if score < init_val:
                        init_val = score
                        id = data['id']
                id_count['f' + str(i)].append(id)
        for list in id_count.values():
            c1 = Counter(list)
            most_id = c1.most_common(1)[0][0]
            id_list.append(most_id)
        return id_list
    import os
    imdb_name = 'CaltechPedestrians_test'
    imdb = get_imdb(imdb_name)
    cfg_file = 'experiments/cfgs/faster_rcnn_end2end.yml'
    model_dir = 'data/pretrained_model/'
    pre_model_name = 'CaltechPedestrians_train_2_vgg16_0.7_b3.h5'
    pretrained_model = model_dir + pre_model_name
    cfg_from_file(cfg_file)
    name_blocks = pre_model_name.split('_')
    if 'vgg16' in name_blocks:
        detector = FasterRCNN_VGG(classes=imdb.classes, debug=False)
    elif 'resnet50' in name_blocks:
        detector = FasterRCNN_RES(classes=imdb.classes, debug=False)
    else:
        detector = FasterRCNN_VGG(classes=imdb.classes, debug=False)
    relu = True if 'relu' in name_blocks else False
    network.load_net(pretrained_model, detector)
    detector.cuda()
    detector.eval()
    print('load model successfully!')

    blob = init_data(is_cuda=True)

    t = Timer()
    t.tic()
    cap = cv2.VideoCapture(video_file)
    init = True
    while (cap.isOpened()):
        ret, frame = cap.read()
        if ret:
            p = Timer()
            p.tic()

            if init:
                cnt = 1
                fourcc = cv2.VideoWriter_fourcc(*'XVID')
                out = cv2.VideoWriter(output_file, fourcc, fps, (frame.shape[1], frame.shape[0]))
                init = False
            try:
                # detect
                tid = (cnt-1) % tps
                dets, scores, classes = detector.detect(frame, blob, thr=0.7, nms_thresh=0.3)
                frame = np.copy(frame)
                # feature extraction
                features = []
                for i, det in enumerate(dets):
                    gt_box = det[np.newaxis,:]
                    features.append(detector.extract_feature_vector(frame, blob, gt_box, relu=relu))
                    det = tuple(int(x) for x in det)
                    cv2.rectangle(frame, det[0:2], det[2:4], (255, 205, 51), 2)
                dataframe = []
                if tid == 0:
                    dataset = []
                    for i, f in enumerate(features):
                        data = {}
                        data['id'] = i
                        data['feature'] = f
                        dataframe.append(data)
                    dataset.append(dataframe)
                    anchors = dets
                elif tid > 0 and tid < tps-1:
                    overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float) \
                                             , np.ascontiguousarray(dets, dtype=np.float))
                    # max : K max overlaps score about N dets
                    overlaps = np.multiply(overlaps, overlaps > 0.7)
                    max_arg = overlaps.argmax(axis=0)
                    for i, arg in enumerate(max_arg):
                        if arg >= len(features):
                            continue
                        data = {}
                        data['id'] = arg
                        data['feature'] = features[arg]
                        dataframe.append(data)
                    dataset.append(dataframe)
                    anchors = dets
                else:
                    id_list = id_track(dataset, features)
                    for i, id in enumerate(id_list):
                        det = tuple(int(x)-2 for x in dets[i])
                        cv2.putText(frame, 'id: ' + str(id), det[0:2], cv2.FONT_HERSHEY_PLAIN, 2.0, (0, 0, 255))
                    # cv2.imshow('demo', frame)
                    # cv2.waitKey(1000)
                    # cv2.destroyAllWindows()
            except:
                pass
            finally:
                if cnt % 10 == 0:
                    print(cnt,'-frame : {:.3f}s'.format(p.toc()))
                cnt += 1
                out.write(frame)
        else:
            break
    runtime = t.toc()
    print('{} frames  /  total spend: {}s  /  {:2.1f} fps'.format(cnt, int(runtime), cnt/runtime))
    cap.release()
    out.release()