Пример #1
0
def id_match_test(model, detector, imdb, roidb):
    from torch.nn.functional import cosine_similarity

    def dist(f1, f2):
        val = (torch.sqrt((f1 - f2)**2)).sum(0).data.cpu().numpy()
        return val

    def cos_sim(f1, f2):
        val = 1.0 - cosine_similarity(f1, f2, dim=0).data.cpu().numpy()
        return val

    detector.cuda()
    detector.eval()
    name_blocks = model.split('_')
    if 'euc' in name_blocks or 'log' in name_blocks:
        val_func = dist
    elif 'cls' in name_blocks:
        val_func = cos_sim
    else:
        val_func = dist
    relu = True if 'relu' in name_blocks else False
    print('Test ID Match with ', model.split('/')[-1])

    match = 0
    batch_size = imdb.num_triplet_test_images
    test_num = len(roidb)
    blob = init_data(is_cuda=True)
    num_set = int(test_num / batch_size)
    for i in range(num_set):
        features = []
        for k in range(batch_size):
            pt = batch_size * i + k
            image = cv2.imread(roidb[pt]['image'])
            gt_boxes = roidb[pt]['boxes'].astype(np.float32)

            features.append(
                detector.extract_feature_vector(image,
                                                blob,
                                                gt_boxes,
                                                relu=relu))
        init_val = 1e15
        for m in range(batch_size):
            for n in range(m + 1, batch_size):
                val = val_func(features[m], features[n])
                if val < init_val:
                    init_val = val
                    min_m, min_n = m, n
        if roidb[batch_size * i + min_m]['ids'] == roidb[batch_size * i +
                                                         min_n]['ids']:
            match += 1
        if (i + 1) % 10 == 0 and i > 0:
            print('------------{:d}   {:.2f}%'.format(i * batch_size,
                                                      match / i * 100))
    print('\tPrecision: %.2f%%, ' % (match / num_set * 100), model)
    return match / num_set * 100
Пример #2
0
def score_analysis(model, detector, imdb, roidb):
    from torch.nn.functional import cosine_similarity

    def dist(f1, f2):
        score = (torch.sqrt((f1 - f2)**2)).sum(0).data.cpu().numpy()
        return score

    detector.cuda()
    detector.eval()
    name_blocks = model.split('_')
    print('Anchor-Positive, Negative Score Analysis ', model.split('/')[-1])

    pos_score = 0.
    neg_score = 0.
    bg_score = 0.
    batch_size = imdb.num_triplet_test_images
    test_num = len(roidb)
    blob = init_data(is_cuda=True)
    num_set = int(test_num / batch_size)
    for i in range(num_set):
        features = []
        bg_features = []
        for k in range(batch_size):
            pt = batch_size * i + k
            image = cv2.imread(roidb[pt]['image'])
            gt_boxes = roidb[pt]['boxes'].astype(np.float32)
            relu = True if 'relu' in name_blocks else False
            features.append(
                detector.extract_feature_vector(image,
                                                blob,
                                                gt_boxes,
                                                relu=relu))
            bg_features.append(
                detector.extract_background_features(image,
                                                     blob,
                                                     gt_boxes,
                                                     relu=relu))

        for m in range(batch_size):
            for n in range(m + 1, batch_size):
                if roidb[batch_size * i + m]['ids'] == roidb[batch_size * i +
                                                             n]['ids']:
                    pos_score += dist(features[m], features[n])
                else:
                    neg_score += dist(features[m], features[n])
                    bg_score += dist(features[m], bg_features[n])
        if (i + 1) % 500 == 0 and i > 0:            print(
                '------------{:d}  pos: {:.4f} neg: {:.4f} bg: {:.4f}'\
.format(i * batch_size, pos_score / i, neg_score / 2*i, bg_score / 2*i))
    pos_score /= num_set
    neg_score /= 2 * num_set
    bg_score /= 2 * num_set
    return pos_score, neg_score, bg_score
Пример #3
0
def test():
    import os
    img_file = 'demo/images.jpeg'
    image = cv2.imread(img_file)

    #imdb_name = 'CaltechPedestrians_train'
    imdb_name = 'coco_2017_train'
    #imdb_name = 'voc_2007_trainval'
    imdb = get_imdb(imdb_name)
    cfg_file = 'experiments/cfgs/faster_rcnn_end2end.yml'
    model_dir = 'data/pretrained_model/'
    #pre_model_name = 'VGGnet_fast_rcnn_iter_70000.h5'
    pre_model_name = 'coco_2017_train_10_vgg16_0.7_b1.h5'
    #pre_model_name = 'CaltechPedestrians_train_1_vgg16_0.7_b1.h5'
    pretrained_model = model_dir + pre_model_name
    cfg_from_file(cfg_file)
    print(imdb.classes)
    if 'vgg16' in pre_model_name.split('_'):
        detector = FasterRCNN_VGG(classes=imdb.classes, debug=False)
    elif 'resnet50' in pre_model_name.split('_'):
        detector = FasterRCNN_RES(classes=imdb.classes, debug=False)
    else:
        detector = FasterRCNN_VGG(classes=imdb.classes, debug=False)
    network.load_net(pretrained_model, detector)
    detector.cuda()
    detector.eval()
    print('load model successfully!')

    blob = init_data(is_cuda=True)

    t = Timer()
    t.tic()

    dets, scores, classes = detector.detect(image,
                                            blob,
                                            thr=0.7,
                                            nms_thresh=0.3)
    runtime = t.toc()
    print('total spend: {}s'.format(runtime))

    im2show = np.copy(image)
    for i, det in enumerate(dets):
        det = tuple(int(x) for x in det)
        cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 2)
        cv2.putText(im2show, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15),\
                    cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1)

    cv2.imwrite(os.path.join('demo', 'out.jpg'), im2show)
    cv2.imshow('demo', im2show)
    cv2.waitKey(0)
Пример #4
0
def test(model, detector, imdb, roidb):

    detector.cuda()
    detector.eval()

    print('Test Detection Performance with ', model.split('/')[-1])
    blob = init_data(is_cuda=True)
    npos, tp, fp = 0, 0, 0
    test_num = len(roidb)
    # display_interval = 1000
    for i in range(test_num):
        gt_boxes = roidb[i]['boxes']
        gt_classes = roidb[i]['gt_classes']
        image = process_img_by_lib(roidb[i]['image'])
        npos += len(gt_boxes)
        try:
            dets, scores, classes = detector.detect(image,
                                                    blob,
                                                    thr=0.7,
                                                    nms_thresh=0.3)
            # dets : N x 4, gt_boxes : K x 4
            # overlaps : N x K overlaps score
            overlaps = bbox_overlaps(np.ascontiguousarray(dets, dtype=np.float) \
                                     , np.ascontiguousarray(gt_boxes, dtype=np.float))
            # max : N overlaps score about K gt boxes
            candidates = overlaps.argmax(axis=1)
            ovmax = overlaps.max(axis=1)
            for k, arg in enumerate(candidates):
                detected_class = imdb._class_to_ind[classes[k]]
                if ovmax[k] > 0.5:
                    if detected_class == gt_classes[arg]:
                        tp += 1
                    else:
                        fp += 1
                else:
                    fp += 1
        except:
            pass

        sys.stdout.write('Eval {:d}/{:d} Precision : {:.2f}, Recall : {:.2f}, Model : {:s}\r'\
                         .format(i+1,test_num,tp/(fp+tp)*100, tp/npos*100, model))
        sys.stdout.flush()
    print(
        '\tPrecision: %.2f%%, Recall: %.2f%%\n' %
        (tp / (fp + tp) * 100, tp / npos * 100), model)
    return tp / (fp + tp) * 100, tp / npos * 100
Пример #5
0
def test():
    import os
    imdb_name = 'CaltechPedestrians_test'
    imdb = get_imdb(imdb_name)
    cfg_file = 'experiments/cfgs/faster_rcnn_end2end.yml'
    model_dir = 'data/pretrained_model/'
    pre_model_name = 'CaltechPedestrians_train_10_vgg16_0.7_b3.h5'
    pretrained_model = model_dir + pre_model_name
    cfg_from_file(cfg_file)

    if 'vgg16' in pre_model_name.split('_'):
        detector = FasterRCNN_VGG(classes=imdb.classes, debug=False)
    elif 'res' in pre_model_name.split('_'):
        detector = FasterRCNN_RES(classes=imdb.classes, debug=False)
    else:
        detector = FasterRCNN_VGG(classes=imdb.classes, debug=False)

    network.load_net(pretrained_model, detector)
    detector.cuda()
    detector.eval()
    print('load model successfully!')
    blob = init_data(is_cuda=True)

    t = Timer()
    t.tic()
    cap = cv2.VideoCapture(video_file)
    init = True
    while (cap.isOpened()):
        ret, frame = cap.read()
        if ret:
            p = Timer()
            p.tic()
            if init:
                cnt = 1
                fourcc = cv2.VideoWriter_fourcc(*'XVID')
                out = cv2.VideoWriter(output_file, fourcc, fps,
                                      (frame.shape[1], frame.shape[0]))
                init = False
            try:
                dets, scores, classes = detector.detect(frame,
                                                        blob,
                                                        thr=0.7,
                                                        nms_thresh=0.3)
                frame = np.copy(frame)
                for i, det in enumerate(dets):
                    det = tuple(int(x) for x in det)
                    cv2.rectangle(frame, det[0:2], det[2:4], (255, 205, 51), 2)
                    # cv2.putText(frame, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15), \
                    #             cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1)
                cv2.imshow('demo', frame)
                cv2.waitKey(1000)
                cv2.destroyAllWindows()
            except IndexError as e:
                pass
            finally:
                print(cnt, '-frame : {:.3f}s'.format(p.toc()))
                cnt += 1
                out.write(frame)
        else:
            break
    runtime = t.toc()
    print('{} frames  /  total spend: {}s  /  {:2.1f} fps'.format(
        cnt, int(runtime), cnt / runtime))
    cap.release()
    out.release()
output_dir = 'models/saved_model3'
pre_model_name = 'CaltechPedestrians_train_triplet_1_vgg16_cls_0.7_b3.h5'
pretrained_model = model_dir + pre_model_name

_DEBUG = False
BG_SHOW = True
id_limit = 20 if BG_SHOW else 50
# load config
cfg_from_file(cfg_file)
# load data
imdb, roidb, ratio_list, ratio_index = extract_roidb(imdb_name)

detector = FasterRCNN_VGG(classes=imdb.classes, debug=_DEBUG)
network.load_net(pretrained_model, detector)

blob = init_data(is_cuda=True)

detector.cuda()
detector.eval()
name_blocks = pre_model_name.split('_')
batch_size = imdb.num_triplet_test_images
test_num = len(roidb)
blob = init_data(is_cuda=True)
features = []
bg_features = []
ids = []
print('Extracting features...')
t = Timer()
t.tic()
for i in range(test_num):
    image = cv2.imread(roidb[i]['image'])
Пример #7
0
def track():
    def id_track(dataset, features):
        from collections import Counter
        def dist(f1, f2):
            score = (torch.sqrt((f1 - f2) ** 2)).sum(0).data.cpu().numpy()[0]
            return score

        id_list = []
        id_count = {'f' + str(i): [] for i in range(len(features))}
        for dataframe in dataset:
            for i, f in enumerate(features):
                init_val = 1e15
                for data in dataframe:
                    score = dist(f, data['feature'])
                    if score < init_val:
                        init_val = score
                        id = data['id']
                id_count['f' + str(i)].append(id)
        for list in id_count.values():
            c1 = Counter(list)
            most_id = c1.most_common(1)[0][0]
            id_list.append(most_id)
        return id_list
    import os
    imdb_name = 'CaltechPedestrians_test'
    imdb = get_imdb(imdb_name)
    cfg_file = 'experiments/cfgs/faster_rcnn_end2end.yml'
    model_dir = 'data/pretrained_model/'
    pre_model_name = 'CaltechPedestrians_train_2_vgg16_0.7_b3.h5'
    pretrained_model = model_dir + pre_model_name
    cfg_from_file(cfg_file)
    name_blocks = pre_model_name.split('_')
    if 'vgg16' in name_blocks:
        detector = FasterRCNN_VGG(classes=imdb.classes, debug=False)
    elif 'resnet50' in name_blocks:
        detector = FasterRCNN_RES(classes=imdb.classes, debug=False)
    else:
        detector = FasterRCNN_VGG(classes=imdb.classes, debug=False)
    relu = True if 'relu' in name_blocks else False
    network.load_net(pretrained_model, detector)
    detector.cuda()
    detector.eval()
    print('load model successfully!')

    blob = init_data(is_cuda=True)

    t = Timer()
    t.tic()
    cap = cv2.VideoCapture(video_file)
    init = True
    while (cap.isOpened()):
        ret, frame = cap.read()
        if ret:
            p = Timer()
            p.tic()

            if init:
                cnt = 1
                fourcc = cv2.VideoWriter_fourcc(*'XVID')
                out = cv2.VideoWriter(output_file, fourcc, fps, (frame.shape[1], frame.shape[0]))
                init = False
            try:
                # detect
                tid = (cnt-1) % tps
                dets, scores, classes = detector.detect(frame, blob, thr=0.7, nms_thresh=0.3)
                frame = np.copy(frame)
                # feature extraction
                features = []
                for i, det in enumerate(dets):
                    gt_box = det[np.newaxis,:]
                    features.append(detector.extract_feature_vector(frame, blob, gt_box, relu=relu))
                    det = tuple(int(x) for x in det)
                    cv2.rectangle(frame, det[0:2], det[2:4], (255, 205, 51), 2)
                dataframe = []
                if tid == 0:
                    dataset = []
                    for i, f in enumerate(features):
                        data = {}
                        data['id'] = i
                        data['feature'] = f
                        dataframe.append(data)
                    dataset.append(dataframe)
                    anchors = dets
                elif tid > 0 and tid < tps-1:
                    overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float) \
                                             , np.ascontiguousarray(dets, dtype=np.float))
                    # max : K max overlaps score about N dets
                    overlaps = np.multiply(overlaps, overlaps > 0.7)
                    max_arg = overlaps.argmax(axis=0)
                    for i, arg in enumerate(max_arg):
                        if arg >= len(features):
                            continue
                        data = {}
                        data['id'] = arg
                        data['feature'] = features[arg]
                        dataframe.append(data)
                    dataset.append(dataframe)
                    anchors = dets
                else:
                    id_list = id_track(dataset, features)
                    for i, id in enumerate(id_list):
                        det = tuple(int(x)-2 for x in dets[i])
                        cv2.putText(frame, 'id: ' + str(id), det[0:2], cv2.FONT_HERSHEY_PLAIN, 2.0, (0, 0, 255))
                    # cv2.imshow('demo', frame)
                    # cv2.waitKey(1000)
                    # cv2.destroyAllWindows()
            except:
                pass
            finally:
                if cnt % 10 == 0:
                    print(cnt,'-frame : {:.3f}s'.format(p.toc()))
                cnt += 1
                out.write(frame)
        else:
            break
    runtime = t.toc()
    print('{} frames  /  total spend: {}s  /  {:2.1f} fps'.format(cnt, int(runtime), cnt/runtime))
    cap.release()
    out.release()