def main(): import argparse parser = argparse.ArgumentParser( description="imsitu VSRL. Training, evaluation and prediction.") parser.add_argument("--gpuid", default=-1, help="put GPU id > -1 in GPU mode", type=int) #parser.add_argument("--command", choices = ["train", "eval", "resume", 'predict'], required = True) parser.add_argument('--resume_training', action='store_true', help='Resume training from the model [resume_model]') parser.add_argument('--resume_model', type=str, default='', help='The model we resume') parser.add_argument('--verb_module', type=str, default='', help='pretrained verb module') parser.add_argument('--train_role', action='store_true', help='cnn fix, verb fix, role train from the scratch') parser.add_argument( '--finetune_verb', action='store_true', help='cnn fix, verb finetune, role train from the scratch') parser.add_argument( '--finetune_cnn', action='store_true', help='cnn finetune, verb finetune, role train from the scratch') parser.add_argument('--output_dir', type=str, default='./trained_models', help='Location to output the model') parser.add_argument('--evaluate', action='store_true', help='Only use the testing mode') parser.add_argument('--test', action='store_true', help='Only use the testing mode') parser.add_argument('--dataset_folder', type=str, default='./imSitu', help='Location of annotations') parser.add_argument('--imgset_dir', type=str, default='./resized_256', help='Location of original images') parser.add_argument('--frcnn_feat_dir', type=str, help='Location of output from detectron') #todo: train role module separately with gt verbs args = parser.parse_args() batch_size = 640 #lr = 5e-6 lr = 0.0001 lr_max = 5e-4 lr_gamma = 0.1 lr_step = 25 clip_norm = 50 weight_decay = 1e-4 n_epoch = 500 n_worker = 3 #dataset_folder = 'imSitu' #imgset_folder = 'resized_256' dataset_folder = args.dataset_folder imgset_folder = args.imgset_dir print('model spec :, top down att with role q ') train_set = json.load(open(dataset_folder + "/train.json")) imsitu_roleq = json.load(open("imsitu_data/imsitu_questions.json")) encoder = imsitu_encoder(train_set, imsitu_roleq) model = model_my_new.BaseModel(encoder, args.gpuid) # To group up the features cnn_features, role_features = utils.group_features_noun(model) train_set = imsitu_loader_roleq(imgset_folder, train_set, encoder, model.train_preprocess()) train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True, num_workers=n_worker) dev_set = json.load(open(dataset_folder + "/dev.json")) dev_set = imsitu_loader_roleq(imgset_folder, dev_set, encoder, model.dev_preprocess()) dev_loader = torch.utils.data.DataLoader(dev_set, batch_size=64, shuffle=True, num_workers=n_worker) test_set = json.load(open(dataset_folder + "/test.json")) test_set = imsitu_loader_roleq(imgset_folder, test_set, encoder, model.dev_preprocess()) test_loader = torch.utils.data.DataLoader(test_set, batch_size=64, shuffle=True, num_workers=n_worker) traindev_set = json.load(open(dataset_folder + "/dev.json")) traindev_set = imsitu_loader_roleq(imgset_folder, traindev_set, encoder, model.dev_preprocess()) traindev_loader = torch.utils.data.DataLoader(traindev_set, batch_size=8, shuffle=True, num_workers=n_worker) utils.set_trainable(model, False) if args.train_role: print('CNN fix, Verb fix, train role from the scratch from: {}'.format( args.verb_module)) args.train_all = False if len(args.verb_module) == 0: raise Exception('[pretrained verb module] not specified') utils.load_net(args.verb_module, [model.conv, model.verb], ['conv', 'verb']) optimizer_select = 1 model_name = 'cfx_vfx_rtrain' elif args.finetune_verb: print('CNN fix, Verb finetune, train role from the scratch from: {}'. format(args.verb_module)) args.train_all = True if len(args.verb_module) == 0: raise Exception('[pretrained verb module] not specified') utils.load_net(args.verb_module, [model.conv, model.verb], ['conv', 'verb']) optimizer_select = 2 model_name = 'cfx_vft_rtrain' elif args.finetune_cnn: print( 'CNN finetune, Verb finetune, train role from the scratch from: {}' .format(args.verb_module)) args.train_all = True if len(args.verb_module) == 0: raise Exception('[pretrained verb module] not specified') utils.load_net(args.verb_module, [model.conv, model.verb], ['conv', 'verb']) optimizer_select = 3 model_name = 'cft_vft_rtrain' elif args.resume_training: print('Resume training from: {}'.format(args.resume_model)) args.train_all = True if len(args.resume_model) == 0: raise Exception('[pretrained verb module] not specified') utils.load_net(args.resume_model, [model]) optimizer_select = 0 model_name = 'resume_all' else: print('Training from the scratch.') optimizer_select = 0 args.train_all = True model_name = 'train_full' optimizer = utils.get_optimizer_noun(lr, weight_decay, optimizer_select, cnn_features, role_features) if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) torch.manual_seed(1234) if args.gpuid >= 0: #print('GPU enabled') model.cuda() torch.cuda.manual_seed(1234) torch.backends.cudnn.deterministic = True optimizer = torch.optim.Adamax([{ 'params': cnn_features, 'lr': 5e-5 }, { 'params': role_features }], lr=1e-3) #optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=lr_step, gamma=lr_gamma) #gradient clipping, grad check scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9) if args.evaluate: top1, top5, val_loss = eval(model, dev_loader, encoder, args.gpuid, write_to_file=True) top1_avg = top1.get_average_results_nouns() top5_avg = top5.get_average_results_nouns() avg_score = top1_avg["verb"] + top1_avg["value"] + top1_avg["value-all"] + top5_avg["verb"] + \ top5_avg["value"] + top5_avg["value-all"] + top5_avg["value*"] + top5_avg["value-all*"] avg_score /= 8 print('Dev average :{:.2f} {} {}'.format( avg_score * 100, utils.format_dict(top1_avg, '{:.2f}', '1-'), utils.format_dict(top5_avg, '{:.2f}', '5-'))) #write results to csv file role_dict = top1.role_dict fail_val_all = top1.value_all_dict pass_val_dict = top1.vall_all_correct with open('role_pred_data.json', 'w') as fp: json.dump(role_dict, fp, indent=4) with open('fail_val_all.json', 'w') as fp: json.dump(fail_val_all, fp, indent=4) with open('pass_val_all.json', 'w') as fp: json.dump(pass_val_dict, fp, indent=4) print('Writing predictions to file completed !') elif args.test: top1, top5, val_loss = eval(model, test_loader, encoder, args.gpuid, write_to_file=True) top1_avg = top1.get_average_results_nouns() top5_avg = top5.get_average_results_nouns() avg_score = top1_avg["verb"] + top1_avg["value"] + top1_avg["value-all"] + top5_avg["verb"] + \ top5_avg["value"] + top5_avg["value-all"] + top5_avg["value*"] + top5_avg["value-all*"] avg_score /= 8 print('Test average :{:.2f} {} {}'.format( avg_score * 100, utils.format_dict(top1_avg, '{:.2f}', '1-'), utils.format_dict(top5_avg, '{:.2f}', '5-'))) else: print('Model training started!') train(model, train_loader, dev_loader, traindev_loader, optimizer, scheduler, n_epoch, args.output_dir, encoder, args.gpuid, clip_norm, lr_max, model_name, args)
def main(): import argparse parser = argparse.ArgumentParser( description="imsitu VSRL. Training, evaluation and prediction.") parser.add_argument("--gpuid", default=-1, help="put GPU id > -1 in GPU mode", type=int) #parser.add_argument("--command", choices = ["train", "eval", "resume", 'predict'], required = True) parser.add_argument('--resume_training', action='store_true', help='Resume training from the model [resume_model]') parser.add_argument('--resume_model', type=str, default='', help='The model we resume') parser.add_argument('--verb_module', type=str, default='', help='pretrained verb module') parser.add_argument('--train_role', action='store_true', help='cnn fix, verb fix, role train from the scratch') parser.add_argument( '--finetune_verb', action='store_true', help='cnn fix, verb finetune, role train from the scratch') parser.add_argument( '--finetune_cnn', action='store_true', help='cnn finetune, verb finetune, role train from the scratch') parser.add_argument('--output_dir', type=str, default='./trained_models', help='Location to output the model') #todo: train role module separately with gt verbs args = parser.parse_args() batch_size = 640 #lr = 5e-6 lr = 1e-5 lr_max = 5e-4 lr_gamma = 0.1 lr_step = 25 clip_norm = 50 weight_decay = 1e-4 n_epoch = 500 n_worker = 3 dataset_folder = 'imSitu' imgset_folder = 'resized_256' print( 'model spec :, 256 hidden, 25 epoch decay, rn_att, 3 layers e-5 init lr decay' ) train_set = json.load(open(dataset_folder + "/train.json")) encoder = imsitu_encoder(train_set) model = model_vsrl_rnatt.RelationNetworks(encoder, args.gpuid) # To group up the features cnn_features, verb_features, role_features = utils.group_features(model) train_set = imsitu_loader(imgset_folder, train_set, encoder, model.train_preprocess()) train_loader = torch.utils.data.DataLoader(train_set, batch_size=24, shuffle=True, num_workers=n_worker) dev_set = json.load(open(dataset_folder + "/dev.json")) dev_set = imsitu_loader(imgset_folder, dev_set, encoder, model.train_preprocess()) dev_loader = torch.utils.data.DataLoader(dev_set, batch_size=24, shuffle=True, num_workers=n_worker) traindev_set = json.load(open(dataset_folder + "/dev.json")) traindev_set = imsitu_loader(imgset_folder, traindev_set, encoder, model.train_preprocess()) traindev_loader = torch.utils.data.DataLoader(traindev_set, batch_size=8, shuffle=True, num_workers=n_worker) utils.set_trainable(model, False) if args.train_role: print('CNN fix, Verb fix, train role from the scratch from: {}'.format( args.verb_module)) args.train_all = False if len(args.verb_module) == 0: raise Exception('[pretrained verb module] not specified') utils.load_net(args.verb_module, [model.conv, model.verb], ['conv', 'verb']) optimizer_select = 1 model_name = 'cfx_vfx_rtrain' elif args.finetune_verb: print('CNN fix, Verb finetune, train role from the scratch from: {}'. format(args.verb_module)) args.train_all = True if len(args.verb_module) == 0: raise Exception('[pretrained verb module] not specified') utils.load_net(args.verb_module, [model.conv, model.verb], ['conv', 'verb']) optimizer_select = 2 model_name = 'cfx_vft_rtrain' elif args.finetune_cnn: print( 'CNN finetune, Verb finetune, train role from the scratch from: {}' .format(args.verb_module)) args.train_all = True if len(args.verb_module) == 0: raise Exception('[pretrained verb module] not specified') utils.load_net(args.verb_module, [model.conv, model.verb], ['conv', 'verb']) optimizer_select = 3 model_name = 'cft_vft_rtrain' elif args.resume_training: print('Resume training from: {}'.format(args.resume_model)) args.train_all = True if len(args.resume_model) == 0: raise Exception('[pretrained verb module] not specified') utils.load_net(args.resume_model, [model]) optimizer_select = 0 model_name = 'resume_all' else: print('Training from the scratch.') optimizer_select = 0 args.train_all = True model_name = 'train_full' optimizer = utils.get_optimizer(lr, weight_decay, optimizer_select, cnn_features, verb_features, role_features) if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) if args.gpuid >= 0: #print('GPU enabled') model.cuda() #opt = utils.NoamOpt(256, 1, 4000, optimizer) #optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=lr_step, gamma=lr_gamma) #gradient clipping, grad check print('Model training started!') train(model, train_loader, dev_loader, traindev_loader, optimizer, scheduler, n_epoch, args.output_dir, encoder, args.gpuid, clip_norm, lr_max, model_name, args)
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument( '--train-file', help='Path to file containing training annotations (see readme)') parser.add_argument( '--pretrained_model', help='Path to file containing training annotations (see readme)') parser.add_argument('--classes-file', help='Path to file containing class list (see readme)') parser.add_argument( '--val-file', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument('--role-file', help='Path to role file') parser.add_argument('--epochs', help='Number of epochs', type=int, default=50) parser.add_argument('--title', type=str, default='') parser.add_argument("--resume-epoch", type=int, default=0) parser.add_argument("--detach-epoch", type=int, default=12) parser.add_argument("--gt-noun-epoch", type=int, default=5) parser.add_argument("--hidden-size", type=int, default=1024) parser.add_argument("--lr-decrease", type=int, default=10) parser.add_argument("--second-lr-decrease", type=int, default=20) parser.add_argument("--iteration", type=float, default=100.0) parser.add_argument("--lr", type=float, default=.0006) parser.add_argument("--batch-size", type=int, default=16) parser = parser.parse_args(args) writer, log_dir = init_log_dir(parser) print('correct version') print("loading dev") with open('./SWiG_jsons/dev.json') as f: dev_gt = json.load(f) print("loading imsitu_dpace") with open('./SWiG_jsons/imsitu_space.json') as f: all = json.load(f) verb_orders = all['verbs'] noun_dict = all['nouns'] dataloader_train, dataset_train, dataloader_val, dataset_val = init_data( parser, verb_orders) print("loading model") retinanet = model_new.resnet50(num_classes=dataset_train.num_classes(), num_nouns=dataset_train.num_nouns(), parser=parser, pretrained=True) utils.load_net(parser.pretrained_model, [retinanet]) print('Loading pretrained RetinaNet finished!') utils.set_trainable(retinanet, False) utils.set_trainable(retinanet.vocab_linear, True) utils.set_trainable(retinanet.vocab_linear_2, True) utils.set_trainable(retinanet.verb_embeding, True) utils.set_trainable(retinanet.noun_embedding, True) utils.set_trainable(retinanet.regressionModel, True) utils.set_trainable(retinanet.classificationModel, True) utils.set_trainable(retinanet.rnn, True) utils.set_trainable(retinanet.rnn_linear, True) optimizer = torch.optim.Adamax([ { 'params': retinanet.vocab_linear.parameters() }, { 'params': retinanet.vocab_linear_2.parameters() }, { 'params': retinanet.verb_embeding.parameters() }, { 'params': retinanet.noun_embedding.parameters() }, { 'params': retinanet.regressionModel.parameters() }, { 'params': retinanet.classificationModel.parameters() }, { 'params': retinanet.rnn.parameters() }, { 'params': retinanet.rnn_linear.parameters() }, ], lr=1e-3) scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9) #retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet = retinanet.cuda() #optimizer = optim.Adam(retinanet.parameters(), lr=parser.lr) print('weights loaded') best_eval = 0 for epoch_num in range(parser.resume_epoch, parser.epochs): train(retinanet, optimizer, dataloader_train, parser, epoch_num, writer) #torch.save({'state_dict': retinanet.module.state_dict(), 'optimizer': optimizer.state_dict()}, log_dir + '/checkpoints/retinanet_{}.pth'.format(epoch_num)) print('Evaluating dataset') eval_avg = evaluate(retinanet, dataloader_val, parser, dataset_val, dataset_train, verb_orders, dev_gt, epoch_num, writer, noun_dict) if eval_avg > best_eval: print('New best model at epoch ', epoch_num) scheduler.step()
def main(): import argparse parser = argparse.ArgumentParser( description="imsitu VSRL. Training, evaluation and prediction.") parser.add_argument("--gpuid", default=-1, help="put GPU id > -1 in GPU mode", type=int) #parser.add_argument("--command", choices = ["train", "eval", "resume", 'predict'], required = True) parser.add_argument('--resume_training', action='store_true', help='Resume training from the model [resume_model]') parser.add_argument('--resume_model', type=str, default='', help='The model we resume') parser.add_argument('--verb_module', type=str, default='', help='pretrained verb module') parser.add_argument('--role_module', type=str, default='', help='pretrained role module') parser.add_argument('--train_role', action='store_true', help='cnn fix, verb fix, role train from the scratch') parser.add_argument( '--finetune_verb', action='store_true', help='cnn fix, verb finetune, role train from the scratch') parser.add_argument( '--finetune_cnn', action='store_true', help='cnn finetune, verb finetune, role train from the scratch') parser.add_argument('--output_dir', type=str, default='./trained_models', help='Location to output the model') parser.add_argument('--evaluate', action='store_true', help='Only use the testing mode') parser.add_argument('--test', action='store_true', help='Only use the testing mode') parser.add_argument('--dataset_folder', type=str, default='./imSitu', help='Location of annotations') parser.add_argument('--imgset_dir', type=str, default='./resized_256', help='Location of original images') parser.add_argument('--frcnn_feat_dir', type=str, help='Location of output from detectron') #todo: train role module separately with gt verbs args = parser.parse_args() batch_size = 640 #lr = 5e-6 lr = 0.0001 lr_max = 5e-4 lr_gamma = 0.1 lr_step = 25 clip_norm = 50 weight_decay = 1e-4 n_epoch = 500 n_worker = 3 #dataset_folder = 'imSitu' #imgset_folder = 'resized_256' dataset_folder = args.dataset_folder imgset_folder = args.imgset_dir print('model spec :, verb role with context ') train_set = json.load(open(dataset_folder + "/train.json")) imsitu_roleq = json.load(open("imsitu_data/imsitu_questions_prev.json")) encoder = imsitu_encoder(train_set, imsitu_roleq) model = model_verbmlp_roletd_new.BaseModel(encoder, args.gpuid) # To group up the features #all verb and role feat are under role as it's a single unit train_set = imsitu_loader_roleq(imgset_folder, train_set, encoder, model.train_preprocess()) train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True, num_workers=n_worker) dev_set = json.load(open(dataset_folder + "/dev.json")) dev_set = imsitu_loader_roleq(imgset_folder, dev_set, encoder, model.dev_preprocess()) dev_loader = torch.utils.data.DataLoader(dev_set, batch_size=64, shuffle=True, num_workers=n_worker) test_set = json.load(open(dataset_folder + "/test.json")) test_set = imsitu_loader_roleq(imgset_folder, test_set, encoder, model.dev_preprocess()) test_loader = torch.utils.data.DataLoader(test_set, batch_size=64, shuffle=True, num_workers=n_worker) traindev_set = json.load(open(dataset_folder + "/dev.json")) traindev_set = imsitu_loader_roleq(imgset_folder, traindev_set, encoder, model.dev_preprocess()) traindev_loader = torch.utils.data.DataLoader(traindev_set, batch_size=8, shuffle=True, num_workers=n_worker) utils.set_trainable(model, False) utils.load_net(args.verb_module, [model.verb]) utils.load_net(args.role_module, [model.roles]) if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) torch.manual_seed(1234) if args.gpuid >= 0: #print('GPU enabled') model.cuda() torch.cuda.manual_seed(1234) torch.backends.cudnn.deterministic = True top1, top5, val_loss = eval(model, dev_loader, encoder, args.gpuid, write_to_file=True) top1_avg = top1.get_average_results() avg_score = top1_avg["verb"] + top1_avg["value"] + top1_avg["value-all"] avg_score /= 3 print('Dev average :{:.2f} {} '.format( avg_score * 100, utils.format_dict(top1_avg, '{:.2f}', '1-')))
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument( '--train-file', help='Path to file containing training annotations (see readme)') #parser.add_argument('--pretrained_sr_model', help='Path to file containing training annotations (see readme)') parser.add_argument('--classes-file', help='Path to file containing class list (see readme)') parser.add_argument( '--val-file', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument('--role-file', help='Path to role file') parser.add_argument('--epochs', help='Number of epochs', type=int, default=50) parser.add_argument('--title', type=str, default='') parser.add_argument("--resume-epoch", type=int, default=0) parser.add_argument("--detach-epoch", type=int, default=12) parser.add_argument("--gt-noun-epoch", type=int, default=5) parser.add_argument("--hidden-size", type=int, default=1024) parser.add_argument("--lr-decrease", type=int, default=10) parser.add_argument("--second-lr-decrease", type=int, default=20) parser.add_argument("--iteration", type=float, default=100.0) parser.add_argument("--lr", type=float, default=.0006) parser.add_argument("--batch-size", type=int, default=16) parser.add_argument('--output_dir', type=str, default='./trained_models', help='Location to output the model') parser.add_argument('--model_saving_name', type=str, help='saving name of the outpul model') parser = parser.parse_args(args) writer, log_dir = init_log_dir(parser) if not os.path.exists(parser.output_dir): os.mkdir(parser.output_dir) print("loading dev") with open('./SWiG_jsons/dev.json') as f: dev_gt = json.load(f) print("loading imsitu_dpace") with open('./SWiG_jsons/imsitu_space.json') as f: all = json.load(f) verb_orders = all['verbs'] noun_dict = all['nouns'] dataloader_train, dataset_train, dataloader_val, dataset_val = init_data( parser, verb_orders) retinanet = model_setup7.resnet50(num_classes=dataset_train.num_classes(), num_nouns=dataset_train.num_nouns(), parser=parser, pretrained=True) #utils.load_net(parser.pretrained_sr_model, [retinanet]) #print('Loading pretrained Resnet Based SR model finished!') utils.set_trainable(retinanet, True) optimizer = torch.optim.Adamax([ { 'params': retinanet.conv1.parameters() }, { 'params': retinanet.bn1.parameters() }, { 'params': retinanet.layer1.parameters() }, { 'params': retinanet.layer2.parameters() }, { 'params': retinanet.layer3.parameters() }, { 'params': retinanet.layer4.parameters() }, { 'params': retinanet.verb_embeding.parameters() }, { 'params': retinanet.vrole_combo_embedding.parameters() }, { 'params': retinanet.query_composer.parameters() }, { 'params': retinanet.v_att.parameters() }, { 'params': retinanet.q_net.parameters() }, { 'params': retinanet.v_net.parameters() }, { 'params': retinanet.gnn.parameters() }, { 'params': retinanet.fpn.parameters() }, { 'params': retinanet.regressionModel.parameters() }, { 'params': retinanet.classificationModel.parameters() }, { 'params': retinanet.gnn_linear.parameters() }, { 'params': retinanet.noun_classifier_roi.parameters() }, ], lr=5e-4) scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9) retinanet = torch.nn.DataParallel(retinanet).cuda() #retinanet = retinanet.cuda() #optimizer = optim.Adam(retinanet.parameters(), lr=parser.lr) best_eval = 0 for epoch_num in range(parser.resume_epoch, parser.epochs): train(retinanet, optimizer, dataloader_train, parser, epoch_num, writer) #torch.save({'state_dict': retinanet.module.state_dict(), 'optimizer': optimizer.state_dict()}, log_dir + '/checkpoints/retinanet_{}.pth'.format(epoch_num)) print('Evaluating dataset') eval_avg = evaluate(retinanet, dataloader_val, parser, dataset_val, dataset_train, verb_orders, dev_gt, epoch_num, writer, noun_dict) if eval_avg > best_eval: best_eval = eval_avg torch.save( { 'state_dict': retinanet.module.state_dict(), 'optimizer': optimizer.state_dict() }, parser.output_dir + "/{}.pth".format(parser.model_saving_name)) print('New best model at epoch ', epoch_num) scheduler.step()
def main(): import argparse parser = argparse.ArgumentParser( description="imsitu VSRL. Training, evaluation and prediction.") parser.add_argument("--gpuid", default=-1, help="put GPU id > -1 in GPU mode", type=int) #parser.add_argument("--command", choices = ["train", "eval", "resume", 'predict'], required = True) parser.add_argument('--resume_training', action='store_true', help='Resume training from the model [resume_model]') parser.add_argument('--resume_model', type=str, default='', help='The model we resume') parser.add_argument('--verb_module', type=str, default='', help='pretrained verb module') parser.add_argument('--train_role', action='store_true', help='cnn fix, verb fix, role train from the scratch') parser.add_argument( '--finetune_verb', action='store_true', help='cnn fix, verb finetune, role train from the scratch') parser.add_argument( '--finetune_cnn', action='store_true', help='cnn finetune, verb finetune, role train from the scratch') parser.add_argument('--output_dir', type=str, default='./trained_models', help='Location to output the model') parser.add_argument('--evaluate', action='store_true', help='Only use the testing mode') #todo: train role module separately with gt verbs args = parser.parse_args() batch_size = 640 #lr = 5e-6 lr = 0.0001 lr_max = 5e-4 lr_gamma = 0.1 lr_step = 25 clip_norm = 50 weight_decay = 1e-4 n_epoch = 500 n_worker = 3 dataset_folder = 'imSitu' imgset_folder = 'resized_256' train_set = json.load(open(dataset_folder + "/train.json")) encoder = imsitu_encoder(train_set) model = model_vsrl_small_finetune.RelationNetworks(encoder, args.gpuid) # To group up the features cnn_features, verb_features, role_features = utils.group_features(model) train_set = imsitu_loader(imgset_folder, train_set, encoder, model.train_preprocess()) train_loader = torch.utils.data.DataLoader(train_set, batch_size=32, shuffle=True, num_workers=n_worker) dev_set = json.load(open(dataset_folder + "/dev.json")) dev_set = imsitu_loader(imgset_folder, dev_set, encoder, model.train_preprocess()) dev_loader = torch.utils.data.DataLoader(dev_set, batch_size=32, shuffle=True, num_workers=n_worker) traindev_set = json.load(open(dataset_folder + "/dev.json")) traindev_set = imsitu_loader(imgset_folder, traindev_set, encoder, model.train_preprocess()) traindev_loader = torch.utils.data.DataLoader(traindev_set, batch_size=8, shuffle=True, num_workers=n_worker) utils.set_trainable(model, False) if args.train_role: print('CNN fix, Verb fix, train role from the scratch from: {}'.format( args.verb_module)) args.train_all = False if len(args.verb_module) == 0: raise Exception('[pretrained verb module] not specified') utils.load_net(args.verb_module, [model.conv, model.verb], ['conv', 'verb']) optimizer_select = 1 model_name = 'cfx_vfx_rtrain' elif args.finetune_verb: print('CNN fix, Verb finetune, train role from the scratch from: {}'. format(args.verb_module)) args.train_all = True if len(args.verb_module) == 0: raise Exception('[pretrained verb module] not specified') utils.load_net(args.verb_module, [model.conv, model.verb], ['conv', 'verb']) optimizer_select = 2 model_name = 'cfx_vft_rtrain' elif args.finetune_cnn: print( 'CNN finetune, Verb finetune, train role from the scratch from: {}' .format(args.verb_module)) args.train_all = True if len(args.verb_module) == 0: raise Exception('[pretrained verb module] not specified') utils.load_net(args.verb_module, [model.conv, model.verb], ['conv', 'verb']) optimizer_select = 3 model_name = 'cft_vft_rtrain' elif args.resume_training: print('Resume training from: {}'.format(args.resume_model)) args.train_all = True if len(args.resume_model) == 0: raise Exception('[pretrained verb module] not specified') utils.load_net(args.resume_model, [model]) optimizer_select = 0 model_name = 'resume_all' else: if not args.evaluate: print('Training from the scratch.') optimizer_select = 0 args.train_all = True model_name = 'train_full' optimizer = utils.get_optimizer(lr, weight_decay, optimizer_select, cnn_features, verb_features, role_features) if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) if args.gpuid >= 0: #print('GPU enabled') model.cuda() #optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=lr_step, gamma=lr_gamma) #gradient clipping, grad check if args.evaluate: top1, top5, val_loss = eval(model, dev_loader, encoder, args.gpuid, write_to_file=True) top1_avg = top1.get_average_results() top5_avg = top5.get_average_results() avg_score = top1_avg["verb"] + top1_avg["value"] + top1_avg["value-all"] + top5_avg["verb"] + \ top5_avg["value"] + top5_avg["value-all"] avg_score /= 8 print('Dev average :{:.2f} {} {}'.format( avg_score * 100, utils.format_dict(top1_avg, '{:.2f}', '1-'), utils.format_dict(top5_avg, '{:.2f}', '5-'))) #write results to csv file gt_labels = top1.gt_situation pred_labels = top1.predicted_situation verb_pred = top1.verb_pred with open("gt_rn_only.csv", "w") as f: writer = csv.writer(f) writer.writerows(gt_labels) with open("pred_rn_only.csv", "w") as f: writer = csv.writer(f) writer.writerows(pred_labels) with open("verbpred_rn_only.csv", "w") as f: writer = csv.writer(f) writer.writerow(['verb', 'total', 'predicted']) for key, value in verb_pred.items(): writer.writerow([key, value[0], value[1]]) print('Writing predictions to file completed !') else: print('Model training started!') train(model, train_loader, dev_loader, traindev_loader, optimizer, scheduler, n_epoch, args.output_dir, encoder, args.gpuid, clip_norm, lr_max, model_name, args)
### STOP EDITING HERE . # Save the list of classes for prediction mode later class_list = utils.get_subfolders(TRAIN_DIR) utils.save_class_list( OUT_DIR, class_list, model_name=args.model, dataset_name=os.path.basename(args.dataset), ) optim = eval(args.optimizer)(lr=args.lr) if args.continue_training is not None: finetune_model = load_model(args.continue_training) if args.transfer_strategy == "finetune": utils.set_trainable(finetune_model, True) else: finetune_model = utils.build_finetune_model( base_model, dropout=args.dropout, fc_layers=FC_LAYERS, num_classes=len(class_list), as_fixed_feature_extractor=True if args.transfer_strategy == "fixed" else False, skip_interval=args.skip_interval, ) finetune_model.compile(optim, loss="categorical_crossentropy", metrics=["accuracy"]) if args.summarize_model: finetune_model.summary()
def set_learnable(net): utils.set_trainable(net, False) utils.set_trainable(net.img_net, True) utils.set_trainable(net.imp_net, True) utils.set_trainable(net.mask_net, True)
dictionary = Dictionary.load_from_file('data/dictionary.pkl') train_dset = VQAFeatureDataset_withmask('train', dictionary) eval_dset = VQAFeatureDataset_withmask('val', dictionary) batch_size = args.batch_size constructor = 'build_baseline0_newatt' baseline = getattr(base_model, constructor)(train_dset, args.num_hid) constructor = 'build_%s' % args.model model = getattr(pretrained_tda_caq_model, constructor)(train_dset, args.num_hid, baseline).cuda() model.w_emb.init_embedding('data/glove6b_init_300d.npy') utils.load_net(args.pretrained_tda_model, [model.tda_model], ['module']) utils.set_trainable(model.tda_model, False) model = nn.DataParallel(model).cuda() #seventyfive = list(range(0, int(math.ceil(len(train_dset) * 0.75)))) #trainset_1 = torch.utils.data.Subset(train_dset, seventyfive) train_loader = DataLoader(train_dset, batch_size, shuffle=True, num_workers=1) eval_loader = DataLoader(eval_dset, batch_size, shuffle=True, num_workers=1) train(model, train_loader, eval_loader, args.epochs, args.output)
def main(): import argparse parser = argparse.ArgumentParser( description="imsitu VSRL. Training, evaluation and prediction.") parser.add_argument("--gpuid", default=-1, help="put GPU id > -1 in GPU mode", type=int) #parser.add_argument("--command", choices = ["train", "eval", "resume", 'predict'], required = True) parser.add_argument('--resume_training', action='store_true', help='Resume training from the model [resume_model]') parser.add_argument('--resume_model', type=str, default='', help='The model we resume') parser.add_argument('--verb_module', type=str, default='', help='pretrained verb module') parser.add_argument('--train_role', action='store_true', help='cnn fix, verb fix, role train from the scratch') parser.add_argument( '--finetune_verb', action='store_true', help='cnn fix, verb finetune, role train from the scratch') parser.add_argument( '--finetune_cnn', action='store_true', help='cnn finetune, verb finetune, role train from the scratch') parser.add_argument('--output_dir', type=str, default='./trained_models', help='Location to output the model') parser.add_argument('--evaluate', action='store_true', help='Only use the testing mode') parser.add_argument('--test', action='store_true', help='Only use the testing mode') parser.add_argument('--dataset_folder', type=str, default='./imSitu', help='Location of annotations') parser.add_argument('--imgset_dir', type=str, default='./resized_256', help='Location of original images') parser.add_argument('--frcnn_feat_dir', type=str, help='Location of output from detectron') parser.add_argument('--batch_size', type=int, default=64) #todo: train role module separately with gt verbs args = parser.parse_args() batch_size = args.batch_size #lr = 5e-6 lr = 0.0001 lr_max = 5e-4 lr_gamma = 0.1 lr_step = 25 clip_norm = 50 weight_decay = 1e-4 n_epoch = 500 n_worker = 3 #dataset_folder = 'imSitu' #imgset_folder = 'resized_256' dataset_folder = args.dataset_folder imgset_folder = args.imgset_dir train_set = json.load(open(dataset_folder + "/updated_train_new.json")) model = model_resnet_imgfeat_extractor.BaseModel() # To group up the features train_set = imsitu_loader_resnet_featextract(imgset_folder, train_set, model.train_preprocess()) train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=False, num_workers=n_worker) dev_set = json.load(open(dataset_folder + "/dev.json")) dev_set = imsitu_loader_resnet_featextract(imgset_folder, dev_set, model.dev_preprocess()) dev_loader = torch.utils.data.DataLoader(dev_set, batch_size=batch_size, shuffle=False, num_workers=n_worker) test_set = json.load(open(dataset_folder + "/test.json")) test_set = imsitu_loader_resnet_featextract(imgset_folder, test_set, model.dev_preprocess()) test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=n_worker) utils.set_trainable(model, False) if args.resume_training: print('Resume training from: {}'.format(args.resume_model)) args.train_all = True if len(args.resume_model) == 0: raise Exception('[pretrained verb module] not specified') utils.load_net(args.resume_model, [model]) if args.gpuid >= 0: model.cuda() extract_features(model, 'train', train_loader, args.gpuid, len(train_loader) * batch_size) extract_features(model, 'val', dev_loader, args.gpuid, len(dev_loader) * batch_size) extract_features(model, 'test', test_loader, args.gpuid, len(test_loader) * batch_size) '''print('rechecking')