def id_match_test(model, detector, imdb, roidb): from torch.nn.functional import cosine_similarity def dist(f1, f2): val = (torch.sqrt((f1 - f2)**2)).sum(0).data.cpu().numpy() return val def cos_sim(f1, f2): val = 1.0 - cosine_similarity(f1, f2, dim=0).data.cpu().numpy() return val detector.cuda() detector.eval() name_blocks = model.split('_') if 'euc' in name_blocks or 'log' in name_blocks: val_func = dist elif 'cls' in name_blocks: val_func = cos_sim else: val_func = dist relu = True if 'relu' in name_blocks else False print('Test ID Match with ', model.split('/')[-1]) match = 0 batch_size = imdb.num_triplet_test_images test_num = len(roidb) blob = init_data(is_cuda=True) num_set = int(test_num / batch_size) for i in range(num_set): features = [] for k in range(batch_size): pt = batch_size * i + k image = cv2.imread(roidb[pt]['image']) gt_boxes = roidb[pt]['boxes'].astype(np.float32) features.append( detector.extract_feature_vector(image, blob, gt_boxes, relu=relu)) init_val = 1e15 for m in range(batch_size): for n in range(m + 1, batch_size): val = val_func(features[m], features[n]) if val < init_val: init_val = val min_m, min_n = m, n if roidb[batch_size * i + min_m]['ids'] == roidb[batch_size * i + min_n]['ids']: match += 1 if (i + 1) % 10 == 0 and i > 0: print('------------{:d} {:.2f}%'.format(i * batch_size, match / i * 100)) print('\tPrecision: %.2f%%, ' % (match / num_set * 100), model) return match / num_set * 100
def score_analysis(model, detector, imdb, roidb): from torch.nn.functional import cosine_similarity def dist(f1, f2): score = (torch.sqrt((f1 - f2)**2)).sum(0).data.cpu().numpy() return score detector.cuda() detector.eval() name_blocks = model.split('_') print('Anchor-Positive, Negative Score Analysis ', model.split('/')[-1]) pos_score = 0. neg_score = 0. bg_score = 0. batch_size = imdb.num_triplet_test_images test_num = len(roidb) blob = init_data(is_cuda=True) num_set = int(test_num / batch_size) for i in range(num_set): features = [] bg_features = [] for k in range(batch_size): pt = batch_size * i + k image = cv2.imread(roidb[pt]['image']) gt_boxes = roidb[pt]['boxes'].astype(np.float32) relu = True if 'relu' in name_blocks else False features.append( detector.extract_feature_vector(image, blob, gt_boxes, relu=relu)) bg_features.append( detector.extract_background_features(image, blob, gt_boxes, relu=relu)) for m in range(batch_size): for n in range(m + 1, batch_size): if roidb[batch_size * i + m]['ids'] == roidb[batch_size * i + n]['ids']: pos_score += dist(features[m], features[n]) else: neg_score += dist(features[m], features[n]) bg_score += dist(features[m], bg_features[n]) if (i + 1) % 500 == 0 and i > 0: print( '------------{:d} pos: {:.4f} neg: {:.4f} bg: {:.4f}'\ .format(i * batch_size, pos_score / i, neg_score / 2*i, bg_score / 2*i)) pos_score /= num_set neg_score /= 2 * num_set bg_score /= 2 * num_set return pos_score, neg_score, bg_score
def test(): import os img_file = 'demo/images.jpeg' image = cv2.imread(img_file) #imdb_name = 'CaltechPedestrians_train' imdb_name = 'coco_2017_train' #imdb_name = 'voc_2007_trainval' imdb = get_imdb(imdb_name) cfg_file = 'experiments/cfgs/faster_rcnn_end2end.yml' model_dir = 'data/pretrained_model/' #pre_model_name = 'VGGnet_fast_rcnn_iter_70000.h5' pre_model_name = 'coco_2017_train_10_vgg16_0.7_b1.h5' #pre_model_name = 'CaltechPedestrians_train_1_vgg16_0.7_b1.h5' pretrained_model = model_dir + pre_model_name cfg_from_file(cfg_file) print(imdb.classes) if 'vgg16' in pre_model_name.split('_'): detector = FasterRCNN_VGG(classes=imdb.classes, debug=False) elif 'resnet50' in pre_model_name.split('_'): detector = FasterRCNN_RES(classes=imdb.classes, debug=False) else: detector = FasterRCNN_VGG(classes=imdb.classes, debug=False) network.load_net(pretrained_model, detector) detector.cuda() detector.eval() print('load model successfully!') blob = init_data(is_cuda=True) t = Timer() t.tic() dets, scores, classes = detector.detect(image, blob, thr=0.7, nms_thresh=0.3) runtime = t.toc() print('total spend: {}s'.format(runtime)) im2show = np.copy(image) for i, det in enumerate(dets): det = tuple(int(x) for x in det) cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 2) cv2.putText(im2show, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15),\ cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) cv2.imwrite(os.path.join('demo', 'out.jpg'), im2show) cv2.imshow('demo', im2show) cv2.waitKey(0)
def test(model, detector, imdb, roidb): detector.cuda() detector.eval() print('Test Detection Performance with ', model.split('/')[-1]) blob = init_data(is_cuda=True) npos, tp, fp = 0, 0, 0 test_num = len(roidb) # display_interval = 1000 for i in range(test_num): gt_boxes = roidb[i]['boxes'] gt_classes = roidb[i]['gt_classes'] image = process_img_by_lib(roidb[i]['image']) npos += len(gt_boxes) try: dets, scores, classes = detector.detect(image, blob, thr=0.7, nms_thresh=0.3) # dets : N x 4, gt_boxes : K x 4 # overlaps : N x K overlaps score overlaps = bbox_overlaps(np.ascontiguousarray(dets, dtype=np.float) \ , np.ascontiguousarray(gt_boxes, dtype=np.float)) # max : N overlaps score about K gt boxes candidates = overlaps.argmax(axis=1) ovmax = overlaps.max(axis=1) for k, arg in enumerate(candidates): detected_class = imdb._class_to_ind[classes[k]] if ovmax[k] > 0.5: if detected_class == gt_classes[arg]: tp += 1 else: fp += 1 else: fp += 1 except: pass sys.stdout.write('Eval {:d}/{:d} Precision : {:.2f}, Recall : {:.2f}, Model : {:s}\r'\ .format(i+1,test_num,tp/(fp+tp)*100, tp/npos*100, model)) sys.stdout.flush() print( '\tPrecision: %.2f%%, Recall: %.2f%%\n' % (tp / (fp + tp) * 100, tp / npos * 100), model) return tp / (fp + tp) * 100, tp / npos * 100
def test(): import os imdb_name = 'CaltechPedestrians_test' imdb = get_imdb(imdb_name) cfg_file = 'experiments/cfgs/faster_rcnn_end2end.yml' model_dir = 'data/pretrained_model/' pre_model_name = 'CaltechPedestrians_train_10_vgg16_0.7_b3.h5' pretrained_model = model_dir + pre_model_name cfg_from_file(cfg_file) if 'vgg16' in pre_model_name.split('_'): detector = FasterRCNN_VGG(classes=imdb.classes, debug=False) elif 'res' in pre_model_name.split('_'): detector = FasterRCNN_RES(classes=imdb.classes, debug=False) else: detector = FasterRCNN_VGG(classes=imdb.classes, debug=False) network.load_net(pretrained_model, detector) detector.cuda() detector.eval() print('load model successfully!') blob = init_data(is_cuda=True) t = Timer() t.tic() cap = cv2.VideoCapture(video_file) init = True while (cap.isOpened()): ret, frame = cap.read() if ret: p = Timer() p.tic() if init: cnt = 1 fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_file, fourcc, fps, (frame.shape[1], frame.shape[0])) init = False try: dets, scores, classes = detector.detect(frame, blob, thr=0.7, nms_thresh=0.3) frame = np.copy(frame) for i, det in enumerate(dets): det = tuple(int(x) for x in det) cv2.rectangle(frame, det[0:2], det[2:4], (255, 205, 51), 2) # cv2.putText(frame, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15), \ # cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) cv2.imshow('demo', frame) cv2.waitKey(1000) cv2.destroyAllWindows() except IndexError as e: pass finally: print(cnt, '-frame : {:.3f}s'.format(p.toc())) cnt += 1 out.write(frame) else: break runtime = t.toc() print('{} frames / total spend: {}s / {:2.1f} fps'.format( cnt, int(runtime), cnt / runtime)) cap.release() out.release()
output_dir = 'models/saved_model3' pre_model_name = 'CaltechPedestrians_train_triplet_1_vgg16_cls_0.7_b3.h5' pretrained_model = model_dir + pre_model_name _DEBUG = False BG_SHOW = True id_limit = 20 if BG_SHOW else 50 # load config cfg_from_file(cfg_file) # load data imdb, roidb, ratio_list, ratio_index = extract_roidb(imdb_name) detector = FasterRCNN_VGG(classes=imdb.classes, debug=_DEBUG) network.load_net(pretrained_model, detector) blob = init_data(is_cuda=True) detector.cuda() detector.eval() name_blocks = pre_model_name.split('_') batch_size = imdb.num_triplet_test_images test_num = len(roidb) blob = init_data(is_cuda=True) features = [] bg_features = [] ids = [] print('Extracting features...') t = Timer() t.tic() for i in range(test_num): image = cv2.imread(roidb[i]['image'])
def track(): def id_track(dataset, features): from collections import Counter def dist(f1, f2): score = (torch.sqrt((f1 - f2) ** 2)).sum(0).data.cpu().numpy()[0] return score id_list = [] id_count = {'f' + str(i): [] for i in range(len(features))} for dataframe in dataset: for i, f in enumerate(features): init_val = 1e15 for data in dataframe: score = dist(f, data['feature']) if score < init_val: init_val = score id = data['id'] id_count['f' + str(i)].append(id) for list in id_count.values(): c1 = Counter(list) most_id = c1.most_common(1)[0][0] id_list.append(most_id) return id_list import os imdb_name = 'CaltechPedestrians_test' imdb = get_imdb(imdb_name) cfg_file = 'experiments/cfgs/faster_rcnn_end2end.yml' model_dir = 'data/pretrained_model/' pre_model_name = 'CaltechPedestrians_train_2_vgg16_0.7_b3.h5' pretrained_model = model_dir + pre_model_name cfg_from_file(cfg_file) name_blocks = pre_model_name.split('_') if 'vgg16' in name_blocks: detector = FasterRCNN_VGG(classes=imdb.classes, debug=False) elif 'resnet50' in name_blocks: detector = FasterRCNN_RES(classes=imdb.classes, debug=False) else: detector = FasterRCNN_VGG(classes=imdb.classes, debug=False) relu = True if 'relu' in name_blocks else False network.load_net(pretrained_model, detector) detector.cuda() detector.eval() print('load model successfully!') blob = init_data(is_cuda=True) t = Timer() t.tic() cap = cv2.VideoCapture(video_file) init = True while (cap.isOpened()): ret, frame = cap.read() if ret: p = Timer() p.tic() if init: cnt = 1 fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_file, fourcc, fps, (frame.shape[1], frame.shape[0])) init = False try: # detect tid = (cnt-1) % tps dets, scores, classes = detector.detect(frame, blob, thr=0.7, nms_thresh=0.3) frame = np.copy(frame) # feature extraction features = [] for i, det in enumerate(dets): gt_box = det[np.newaxis,:] features.append(detector.extract_feature_vector(frame, blob, gt_box, relu=relu)) det = tuple(int(x) for x in det) cv2.rectangle(frame, det[0:2], det[2:4], (255, 205, 51), 2) dataframe = [] if tid == 0: dataset = [] for i, f in enumerate(features): data = {} data['id'] = i data['feature'] = f dataframe.append(data) dataset.append(dataframe) anchors = dets elif tid > 0 and tid < tps-1: overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float) \ , np.ascontiguousarray(dets, dtype=np.float)) # max : K max overlaps score about N dets overlaps = np.multiply(overlaps, overlaps > 0.7) max_arg = overlaps.argmax(axis=0) for i, arg in enumerate(max_arg): if arg >= len(features): continue data = {} data['id'] = arg data['feature'] = features[arg] dataframe.append(data) dataset.append(dataframe) anchors = dets else: id_list = id_track(dataset, features) for i, id in enumerate(id_list): det = tuple(int(x)-2 for x in dets[i]) cv2.putText(frame, 'id: ' + str(id), det[0:2], cv2.FONT_HERSHEY_PLAIN, 2.0, (0, 0, 255)) # cv2.imshow('demo', frame) # cv2.waitKey(1000) # cv2.destroyAllWindows() except: pass finally: if cnt % 10 == 0: print(cnt,'-frame : {:.3f}s'.format(p.toc())) cnt += 1 out.write(frame) else: break runtime = t.toc() print('{} frames / total spend: {}s / {:2.1f} fps'.format(cnt, int(runtime), cnt/runtime)) cap.release() out.release()