def test(): import os img_file = 'demo/images.jpeg' image = cv2.imread(img_file) #imdb_name = 'CaltechPedestrians_train' imdb_name = 'coco_2017_train' #imdb_name = 'voc_2007_trainval' imdb = get_imdb(imdb_name) cfg_file = 'experiments/cfgs/faster_rcnn_end2end.yml' model_dir = 'data/pretrained_model/' #pre_model_name = 'VGGnet_fast_rcnn_iter_70000.h5' pre_model_name = 'coco_2017_train_10_vgg16_0.7_b1.h5' #pre_model_name = 'CaltechPedestrians_train_1_vgg16_0.7_b1.h5' pretrained_model = model_dir + pre_model_name cfg_from_file(cfg_file) print(imdb.classes) if 'vgg16' in pre_model_name.split('_'): detector = FasterRCNN_VGG(classes=imdb.classes, debug=False) elif 'resnet50' in pre_model_name.split('_'): detector = FasterRCNN_RES(classes=imdb.classes, debug=False) else: detector = FasterRCNN_VGG(classes=imdb.classes, debug=False) network.load_net(pretrained_model, detector) detector.cuda() detector.eval() print('load model successfully!') blob = init_data(is_cuda=True) t = Timer() t.tic() dets, scores, classes = detector.detect(image, blob, thr=0.7, nms_thresh=0.3) runtime = t.toc() print('total spend: {}s'.format(runtime)) im2show = np.copy(image) for i, det in enumerate(dets): det = tuple(int(x) for x in det) cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 2) cv2.putText(im2show, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15),\ cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) cv2.imwrite(os.path.join('demo', 'out.jpg'), im2show) cv2.imshow('demo', im2show) cv2.waitKey(0)
def __init__(self, classes, n_action_classes, n_action_nonagent_roles, **kwargs): super(HoiModel, self).__init__() print "Constructing HOI Model" faster_rcnn_config = kwargs.get("faster_rcnn_config", None) if faster_rcnn_config is not None: cf.cfg_from_file(faster_rcnn_config) faster_rcnn_cle = kwargs.get("faster_rcnn_command_line", None) if faster_rcnn_cle is not None: cf.cfg_from_list(faster_rcnn_cle) assert(cf.cfg["NCLASSES"] == len(classes)), \ "inconsistent FasterRCNN settings" self.detection_branch = FasterRCNN(classes=classes) self.human_centric_branch = HumanCentricBranch( n_action_classes, n_action_nonagent_roles) self.interaction_branch = InteractionBranch(n_action_nonagent_roles)
def process_img_by_lib(path): cfg_file = 'experiments/cfgs/faster_rcnn_end2end.yml' cfg_from_file(cfg_file) if cfg.IMAGE_PROCESS_LIB == 'cv2': imread = cv2.imread RGB2BGR = False elif cfg.IMAGE_PROCESS_LIB == 'sci': # this is same with PIL imread = sciImread RGB2BGR = True else: print("IMAGE PROCESS LIB IS NOT DEFINED") raise ModuleNotFoundError im = imread(path) if len(im.shape) == 2: im = im[:, :, np.newaxis] im = np.concatenate((im, im, im), axis=2) # flip the channel, since the original one using cv2 # rgb -> bgr if RGB2BGR: im = im[:, :, ::-1] return im
lr_decay_steps = {60000, 80000} lr_decay = 1./10 rand_seed = 1024 _DEBUG = True use_tensorboard = False remove_all_log = False # remove all historical experiments in TensorBoard exp_name = None # the previous experiment name in TensorBoard # ------------ if rand_seed is not None: np.random.seed(rand_seed) # load config cfg_from_file(cfg_file) lr = cfg.TRAIN.LEARNING_RATE momentum = cfg.TRAIN.MOMENTUM weight_decay = cfg.TRAIN.WEIGHT_DECAY disp_interval = cfg.TRAIN.DISPLAY log_interval = cfg.TRAIN.LOG_IMAGE_ITERS # load data imdb = VisualGenome(split=0, num_im=50) roidb = imdb.roidb data_layer = RoIDataLayer(roidb, imdb.num_classes) # load net net = FasterRCNN(classes=imdb.classes, debug=_DEBUG) network.weights_normal_init(net, dev=0.01) network.load_net(pretrained_model, net)
lr_decay_steps = {60000, 80000} lr_decay = 1./10 rand_seed = 1024 _DEBUG = True use_tensorboard = False remove_all_log = False # remove all historical experiments in TensorBoard exp_name = None # the previous experiment name in TensorBoard # ------------ if rand_seed is not None: np.random.seed(rand_seed) # load config cfg_from_file(cfg_file) # overrides default config params lr = cfg.TRAIN.LEARNING_RATE momentum = cfg.TRAIN.MOMENTUM weight_decay = cfg.TRAIN.WEIGHT_DECAY disp_interval = cfg.TRAIN.DISPLAY log_interval = cfg.TRAIN.LOG_IMAGE_ITERS # load data imdb = get_imdb(imdb_name) rdl_roidb.prepare_roidb(imdb) roidb = imdb.roidb data_layer = RoIDataLayer(roidb, imdb.num_classes) # load net net = FasterRCNN(classes=imdb.classes, debug=_DEBUG) network.weights_normal_init(net, dev=0.01)
def test(): import os imdb_name = 'CaltechPedestrians_test' imdb = get_imdb(imdb_name) cfg_file = 'experiments/cfgs/faster_rcnn_end2end.yml' model_dir = 'data/pretrained_model/' pre_model_name = 'CaltechPedestrians_train_10_vgg16_0.7_b3.h5' pretrained_model = model_dir + pre_model_name cfg_from_file(cfg_file) if 'vgg16' in pre_model_name.split('_'): detector = FasterRCNN_VGG(classes=imdb.classes, debug=False) elif 'res' in pre_model_name.split('_'): detector = FasterRCNN_RES(classes=imdb.classes, debug=False) else: detector = FasterRCNN_VGG(classes=imdb.classes, debug=False) network.load_net(pretrained_model, detector) detector.cuda() detector.eval() print('load model successfully!') blob = init_data(is_cuda=True) t = Timer() t.tic() cap = cv2.VideoCapture(video_file) init = True while (cap.isOpened()): ret, frame = cap.read() if ret: p = Timer() p.tic() if init: cnt = 1 fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_file, fourcc, fps, (frame.shape[1], frame.shape[0])) init = False try: dets, scores, classes = detector.detect(frame, blob, thr=0.7, nms_thresh=0.3) frame = np.copy(frame) for i, det in enumerate(dets): det = tuple(int(x) for x in det) cv2.rectangle(frame, det[0:2], det[2:4], (255, 205, 51), 2) # cv2.putText(frame, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15), \ # cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) cv2.imshow('demo', frame) cv2.waitKey(1000) cv2.destroyAllWindows() except IndexError as e: pass finally: print(cnt, '-frame : {:.3f}s'.format(p.toc())) cnt += 1 out.write(frame) else: break runtime = t.toc() print('{} frames / total spend: {}s / {:2.1f} fps'.format( cnt, int(runtime), cnt / runtime)) cap.release() out.release()
def track(): def id_track(dataset, features): from collections import Counter def dist(f1, f2): score = (torch.sqrt((f1 - f2) ** 2)).sum(0).data.cpu().numpy()[0] return score id_list = [] id_count = {'f' + str(i): [] for i in range(len(features))} for dataframe in dataset: for i, f in enumerate(features): init_val = 1e15 for data in dataframe: score = dist(f, data['feature']) if score < init_val: init_val = score id = data['id'] id_count['f' + str(i)].append(id) for list in id_count.values(): c1 = Counter(list) most_id = c1.most_common(1)[0][0] id_list.append(most_id) return id_list import os imdb_name = 'CaltechPedestrians_test' imdb = get_imdb(imdb_name) cfg_file = 'experiments/cfgs/faster_rcnn_end2end.yml' model_dir = 'data/pretrained_model/' pre_model_name = 'CaltechPedestrians_train_2_vgg16_0.7_b3.h5' pretrained_model = model_dir + pre_model_name cfg_from_file(cfg_file) name_blocks = pre_model_name.split('_') if 'vgg16' in name_blocks: detector = FasterRCNN_VGG(classes=imdb.classes, debug=False) elif 'resnet50' in name_blocks: detector = FasterRCNN_RES(classes=imdb.classes, debug=False) else: detector = FasterRCNN_VGG(classes=imdb.classes, debug=False) relu = True if 'relu' in name_blocks else False network.load_net(pretrained_model, detector) detector.cuda() detector.eval() print('load model successfully!') blob = init_data(is_cuda=True) t = Timer() t.tic() cap = cv2.VideoCapture(video_file) init = True while (cap.isOpened()): ret, frame = cap.read() if ret: p = Timer() p.tic() if init: cnt = 1 fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_file, fourcc, fps, (frame.shape[1], frame.shape[0])) init = False try: # detect tid = (cnt-1) % tps dets, scores, classes = detector.detect(frame, blob, thr=0.7, nms_thresh=0.3) frame = np.copy(frame) # feature extraction features = [] for i, det in enumerate(dets): gt_box = det[np.newaxis,:] features.append(detector.extract_feature_vector(frame, blob, gt_box, relu=relu)) det = tuple(int(x) for x in det) cv2.rectangle(frame, det[0:2], det[2:4], (255, 205, 51), 2) dataframe = [] if tid == 0: dataset = [] for i, f in enumerate(features): data = {} data['id'] = i data['feature'] = f dataframe.append(data) dataset.append(dataframe) anchors = dets elif tid > 0 and tid < tps-1: overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float) \ , np.ascontiguousarray(dets, dtype=np.float)) # max : K max overlaps score about N dets overlaps = np.multiply(overlaps, overlaps > 0.7) max_arg = overlaps.argmax(axis=0) for i, arg in enumerate(max_arg): if arg >= len(features): continue data = {} data['id'] = arg data['feature'] = features[arg] dataframe.append(data) dataset.append(dataframe) anchors = dets else: id_list = id_track(dataset, features) for i, id in enumerate(id_list): det = tuple(int(x)-2 for x in dets[i]) cv2.putText(frame, 'id: ' + str(id), det[0:2], cv2.FONT_HERSHEY_PLAIN, 2.0, (0, 0, 255)) # cv2.imshow('demo', frame) # cv2.waitKey(1000) # cv2.destroyAllWindows() except: pass finally: if cnt % 10 == 0: print(cnt,'-frame : {:.3f}s'.format(p.toc())) cnt += 1 out.write(frame) else: break runtime = t.toc() print('{} frames / total spend: {}s / {:2.1f} fps'.format(cnt, int(runtime), cnt/runtime)) cap.release() out.release()