def eval_seq( opt, save_dir='/Users/ecom-v.ramesh/Documents/Personal/2020/DL/Trackjectory/output' ): # load net net = SiamRPNPP() load_net(net, opt.single_track_load_model) net.eval() #.cuda() # image and init box image_files = sorted( glob.glob('/Users/ecom-v.ramesh/Desktop/kabadi/frames/frames2/*.png')) init_rbox = [695, 250, 885, 250, 695, 570, 885, 570] [cx, cy, w, h] = get_axis_aligned_bbox(init_rbox) # tracker init target_pos, target_sz = np.array([cx, cy]), np.array([w, h]) im = cv2.imread(image_files[0]) # HxWxC state = SiamRPN_init(im, target_pos, target_sz, net) # tracking and visualization toc = 0 for f, image_file in enumerate(image_files): im = cv2.imread(image_file) # print(im.shape) tic = cv2.getTickCount() state = SiamRPN_track(state, im) # track toc += cv2.getTickCount() - tic res = cxy_wh_2_rect(state['target_pos'], state['target_sz']) res = [int(l) for l in res] # print(res) cv2.rectangle(im, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]), (0, 255, 255), 3) cv2.imwrite( '/Users/ecom-v.ramesh/Documents/Personal/2020/DL/Trackjectory/output2/siam/' + str(f) + '.png', im) print('Tracking Speed {:.1f}fps'.format( (len(image_files) - 1) / (toc / cv2.getTickFrequency())))
def main(): import argparse parser = argparse.ArgumentParser( description="imsitu VSRL. Training, evaluation and prediction.") parser.add_argument("--gpuid", default=-1, help="put GPU id > -1 in GPU mode", type=int) parser.add_argument('--output_dir', type=str, default='./trained_models', help='Location to output the model') parser.add_argument('--resume_training', action='store_true', help='Resume training from the model [resume_model]') parser.add_argument('--resume_model', type=str, default='', help='The model we resume') parser.add_argument('--evaluate', action='store_true', help='Only use the testing mode') parser.add_argument('--evaluate_rare', action='store_true', help='Only use the testing mode') parser.add_argument('--test', action='store_true', help='Only use the testing mode') parser.add_argument('--dataset_folder', type=str, default='./imSitu', help='Location of annotations') parser.add_argument('--imgset_dir', type=str, default='./resized_256', help='Location of original images') parser.add_argument('--train_file', default="train_freq2000.json", type=str, help='trainfile name') parser.add_argument('--dev_file', default="dev_freq2000.json", type=str, help='dev file name') parser.add_argument('--test_file', default="test_freq2000.json", type=str, help='test file name') parser.add_argument('--model_saving_name', type=str, help='saving name of the outpul model') parser.add_argument('--epochs', type=int, default=500) parser.add_argument('--model', type=str, default='vgg_caq_joint') parser.add_argument('--batch_size', type=int, default=64) parser.add_argument('--seed', type=int, default=1111, help='random seed') parser.add_argument('--clip_norm', type=float, default=0.25) parser.add_argument('--num_workers', type=int, default=3) parser.add_argument('--vgg_verb_model', type=str, default='', help='Pretrained vgg verb model') parser.add_argument('--tda_verb_model', type=str, default='', help='Pretrained topdown verb model') parser.add_argument('--caq_model', type=str, default='', help='Pretrained CAQ model') args = parser.parse_args() n_epoch = args.epochs batch_size = args.batch_size clip_norm = args.clip_norm n_worker = args.num_workers dataset_folder = args.dataset_folder imgset_folder = args.imgset_dir train_set = json.load(open(dataset_folder + '/' + args.train_file)) encoder = imsitu_encoder.imsitu_encoder(train_set) train_set = imsitu_loader.imsitu_loader(imgset_folder, train_set, encoder, 'train', encoder.train_transform) constructor = 'build_vgg_verb_classifier' vgg_verb_model = getattr(vgg_verb_classifier, constructor)(len(encoder.verb_list)) constructor = 'build_top_down_baseline' role_module = getattr(top_down_baseline_addemb, constructor)(encoder.get_num_roles(), encoder.get_num_verbs(), encoder.get_num_labels(), encoder) constructor = 'build_top_down_verb' tda_verb_model = getattr(top_down_verb, constructor)(encoder.get_num_labels(), encoder.get_num_verbs(), role_module) constructor = 'build_top_down_baseline' tda_role_module = getattr(top_down_baseline, constructor)(encoder.get_num_roles(), encoder.get_num_verbs(), encoder.get_num_labels(), encoder) constructor = 'build_top_down_query_context' caq_model = getattr(top_down_query_context, constructor)(encoder.get_num_roles(), encoder.get_num_verbs(), encoder.get_num_labels(), encoder, tda_role_module) constructor = 'build_%s' % args.model model = getattr(revgg_caq_joint_eval, constructor)(vgg_verb_model, tda_verb_model, caq_model) train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=n_worker) dev_set = json.load(open(dataset_folder + '/' + args.dev_file)) dev_set = imsitu_loader.imsitu_loader(imgset_folder, dev_set, encoder, 'val', encoder.dev_transform) dev_loader = torch.utils.data.DataLoader(dev_set, batch_size=batch_size, shuffle=True, num_workers=n_worker) test_set = json.load(open(dataset_folder + '/' + args.test_file)) test_set = imsitu_loader.imsitu_loader(imgset_folder, test_set, encoder, 'test', encoder.dev_transform) test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=True, num_workers=n_worker) if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) torch.manual_seed(args.seed) if args.gpuid >= 0: model.cuda() torch.cuda.manual_seed(args.seed) torch.backends.cudnn.benchmark = True #load models utils.load_net(args.vgg_verb_model, [model.vgg_model]) print('successfully loaded vgg_verb_model!') utils.load_net(args.tda_verb_model, [model.tda_model]) print('successfully loaded tda_verb_model!') utils.load_net(args.caq_model, [model.caq_model]) print('successfully loaded caq_model!') if args.evaluate: top1, top5, val_loss = eval(model, dev_loader, encoder, args.gpuid, write_to_file=True) top1_avg = top1.get_average_results() top5_avg = top5.get_average_results() avg_score = top1_avg["verb"] + top1_avg["value"] + top1_avg["value-all"] + top5_avg["verb"] + \ top5_avg["value"] + top5_avg["value-all"] + top5_avg["value*"] + top5_avg["value-all*"] avg_score /= 8 print('Dev average :{:.2f} {} {}'.format( avg_score * 100, utils.format_dict(top1_avg, '{:.2f}', '1-'), utils.format_dict(top5_avg, '{:.2f}', '5-'))) elif args.test: top1, top5, val_loss = eval(model, test_loader, encoder, args.gpuid, write_to_file=True) top1_avg = top1.get_average_results() top5_avg = top5.get_average_results() avg_score = top1_avg["verb"] + top1_avg["value"] + top1_avg["value-all"] + top5_avg["verb"] + \ top5_avg["value"] + top5_avg["value-all"] + top5_avg["value*"] + top5_avg["value-all*"] avg_score /= 8 print('Test average :{:.2f} {} {}'.format( avg_score * 100, utils.format_dict(top1_avg, '{:.2f}', '1-'), utils.format_dict(top5_avg, '{:.2f}', '5-')))
def main(): import argparse parser = argparse.ArgumentParser( description="imsitu VSRL. Training, evaluation and prediction.") parser.add_argument("--gpuid", default=-1, help="put GPU id > -1 in GPU mode", type=int) parser.add_argument('--output_dir', type=str, default='./trained_models', help='Location to output the model') parser.add_argument('--resume_training', action='store_true', help='Resume training from the model [resume_model]') parser.add_argument('--resume_model', type=str, default='', help='The model we resume') parser.add_argument('--evaluate', action='store_true', help='Only use the testing mode') parser.add_argument('--evaluate_visualize', action='store_true', help='Only use the testing mode to visualize ') parser.add_argument('--evaluate_rare', action='store_true', help='Only use the testing mode') parser.add_argument('--test', action='store_true', help='Only use the testing mode') parser.add_argument('--dataset_folder', type=str, default='./imSitu', help='Location of annotations') parser.add_argument('--imgset_dir', type=str, default='./resized_256', help='Location of original images') parser.add_argument('--train_file', default="train_freq2000.json", type=str, help='trainfile name') parser.add_argument('--dev_file', default="dev_freq2000.json", type=str, help='dev file name') parser.add_argument('--test_file', default="test_freq2000.json", type=str, help='test file name') parser.add_argument('--model_saving_name', type=str, help='saving name of the outpul model') parser.add_argument('--epochs', type=int, default=500) parser.add_argument('--model', type=str, default='top_down_baseline') parser.add_argument('--batch_size', type=int, default=64) parser.add_argument('--seed', type=int, default=1111, help='random seed') parser.add_argument('--clip_norm', type=float, default=0.25) parser.add_argument('--num_workers', type=int, default=3) args = parser.parse_args() n_epoch = args.epochs batch_size = args.batch_size clip_norm = args.clip_norm n_worker = args.num_workers dataset_folder = args.dataset_folder imgset_folder = args.imgset_dir train_set = json.load(open(dataset_folder + '/' + args.train_file)) encoder = imsitu_encoder.imsitu_encoder(train_set) train_set = imsitu_loader.imsitu_loader(imgset_folder, train_set, encoder, 'train', encoder.train_transform) constructor = 'build_%s' % args.model model = getattr(top_down_baseline, constructor)(encoder.get_num_roles(), encoder.get_num_verbs(), encoder.get_num_labels(), encoder) train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=n_worker) dev_set = json.load(open(dataset_folder + '/' + args.dev_file)) dev_set = imsitu_loader.imsitu_loader(imgset_folder, dev_set, encoder, 'val', encoder.dev_transform) dev_loader = torch.utils.data.DataLoader(dev_set, batch_size=batch_size, shuffle=True, num_workers=n_worker) test_set = json.load(open(dataset_folder + '/' + args.test_file)) test_set = imsitu_loader.imsitu_loader(imgset_folder, test_set, encoder, 'test', encoder.dev_transform) test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=True, num_workers=n_worker) if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) torch.manual_seed(args.seed) if args.gpuid >= 0: model.cuda() torch.cuda.manual_seed(args.seed) torch.backends.cudnn.benchmark = True if args.resume_training: print('Resume training from: {}'.format(args.resume_model)) args.train_all = True if len(args.resume_model) == 0: raise Exception('[pretrained module] not specified') utils.load_net(args.resume_model, [model]) optimizer = torch.optim.Adamax(model.parameters(), lr=1e-3) model_name = 'resume_all' else: print('Training from the scratch.') model_name = 'train_full' utils.set_trainable(model, True) optimizer = torch.optim.Adamax( [{ 'params': model.convnet.parameters(), 'lr': 5e-5 }, { 'params': model.role_emb.parameters() }, { 'params': model.verb_emb.parameters() }, { 'params': model.query_composer.parameters() }, { 'params': model.v_att.parameters() }, { 'params': model.q_net.parameters() }, { 'params': model.v_net.parameters() }, { 'params': model.classifier.parameters() }], lr=1e-3) scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9) if args.evaluate: top1, top5, val_loss = eval(model, dev_loader, encoder, args.gpuid) top1_avg = top1.get_average_results_nouns() top5_avg = top5.get_average_results_nouns() avg_score = top1_avg["verb"] + top1_avg["value"] + top1_avg["value-all"] + top5_avg["verb"] + \ top5_avg["value"] + top5_avg["value-all"] + top5_avg["value*"] + top5_avg["value-all*"] avg_score /= 8 print('Dev average :{:.2f} {} {}'.format( avg_score * 100, utils.format_dict(top1_avg, '{:.2f}', '1-'), utils.format_dict(top5_avg, '{:.2f}', '5-'))) elif args.test: top1, top5, val_loss = eval(model, test_loader, encoder, args.gpuid) top1_avg = top1.get_average_results_nouns() top5_avg = top5.get_average_results_nouns() avg_score = top1_avg["verb"] + top1_avg["value"] + top1_avg["value-all"] + top5_avg["verb"] + \ top5_avg["value"] + top5_avg["value-all"] + top5_avg["value*"] + top5_avg["value-all*"] avg_score /= 8 print('Test average :{:.2f} {} {}'.format( avg_score * 100, utils.format_dict(top1_avg, '{:.2f}', '1-'), utils.format_dict(top5_avg, '{:.2f}', '5-'))) else: print('Model training started!') train( model, train_loader, dev_loader, optimizer, scheduler, n_epoch, args.output_dir, encoder, args.gpuid, clip_norm, model_name, args.model_saving_name, )
optimizer = torch.optim.Adamax(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr) torch.backends.cudnn.benchmark = True #if resuming, load it on cpu or GPUs if len(args.resume_model) > 1: print('Resume training from: {}'.format(args.resume_model)) if torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') path_to_model = pjoin(args.saving_folder, args.resume_model) checkpoint = torch.load(path_to_model, map_location=device) utils.load_net(path_to_model, [model]) if torch.cuda.is_available(): for parameter in model.module.convnet_verbs.parameters(): parameter.requires_grad = False model.module.convnet_verbs.model.fc.requires_grad = True for parameter in model.module.convnet_nouns.parameters(): parameter.requires_grad = False model.module.convnet_nouns.model.fc.requires_grad = True else: for parameter in model.convnet_verbs.parameters(): parameter.requires_grad = False model.convnet_verbs.model.fc.requires_grad = True for parameter in model.convnet_nouns.parameters():