from vqa.models import sen2vec import datasets.utils.print_utils as print_utils import datasets.utils.io_utils as io_utils import datasets.utils.paths_utils as path_utils import os import pandas as pd import sys sys.path.append("C:\\Users\\minhm\\Documents\\GitHub\\vqa_idrid") CURRENT_WORKING_DIR = os.path.realpath(__file__) PROJECT_DIR = path_utils.get_project_dir(CURRENT_WORKING_DIR, "vqa_idrid") RAW_DIR = PROJECT_DIR + "/data/vqa_med/raw/raw/" PROCESSED_QA_PER_QUESTION_PATH = RAW_DIR + "med_qa_per_question.csv" EXTRACTED_QUES_FEATURES_PATH = RAW_DIR + "question_features.pickle" BASE_EXTRACTED_QUES_FEATURES_PATH = RAW_DIR + "question_features_base.pickle" e = io_utils.read_pickle(EXTRACTED_QUES_FEATURES_PATH) b = io_utils.read_pickle(BASE_EXTRACTED_QUES_FEATURES_PATH) print("a")
def main(): global args, best_acc1 args = parser.parse_args() ######################################################################################### # Create options ######################################################################################### if args.bert_model == "bert-base-uncased": question_features_path = BASE_EXTRACTED_QUES_FEATURES_PATH elif args.bert_model == "bert-base-multilingual-cased": question_features_path = CASED_EXTRACTED_QUES_FEATURES_PATH else: question_features_path = EXTRACTED_QUES_FEATURES_PATH options = { 'vqa': { 'trainsplit': args.vqa_trainsplit }, 'logs': { 'dir_logs': args.dir_logs }, 'model': { 'arch': args.arch, 'seq2vec': { 'type': args.st_type, 'dropout': args.st_dropout, 'fixed_emb': args.st_fixed_emb } }, 'optim': { 'lr': args.learning_rate, 'batch_size': args.batch_size, 'epochs': args.epochs } } if args.path_opt is not None: with open(args.path_opt, 'r') as handle: options_yaml = yaml.load(handle, Loader=yaml.FullLoader) options = utils.update_values(options, options_yaml) print('## args') pprint(vars(args)) print('## options') pprint(options) if args.help_opt: return # Set datasets options if 'vgenome' not in options: options['vgenome'] = None ######################################################################################### # Create needed datasets ######################################################################################### trainset = datasets.factory_VQA(options['vqa']['trainsplit'], options['vqa'], options['coco'], options['vgenome']) train_loader = trainset.data_loader( batch_size=options['optim']['batch_size'], num_workers=args.workers, shuffle=True) if options['vqa']['trainsplit'] == 'train': valset = datasets.factory_VQA('val', options['vqa'], options['coco']) val_loader = valset.data_loader( batch_size=options['optim']['batch_size'], num_workers=args.workers) if options['vqa']['trainsplit'] == 'trainval' or args.evaluate: testset = datasets.factory_VQA('test', options['vqa'], options['coco']) test_loader = testset.data_loader( batch_size=options['optim']['batch_size'], num_workers=args.workers) ######################################################################################### # Create model, criterion and optimizer ######################################################################################### model = models.factory(options['model'], trainset.vocab_words(), trainset.vocab_answers(), cuda=True, data_parallel=True) criterion = criterions.factory(options['vqa'], cuda=True) optimizer = torch.optim.Adam( filter(lambda p: p.requires_grad, model.parameters()), options['optim']['lr']) ######################################################################################### # args.resume: resume from a checkpoint OR create logs directory ######################################################################################### exp_logger = None if args.resume: args.start_epoch, best_acc1, exp_logger = load_checkpoint( model.module, optimizer, os.path.join(options['logs']['dir_logs'], args.resume)) else: # Or create logs directory if os.path.isdir(options['logs']['dir_logs']): if click.confirm( 'Logs directory already exists in {}. Erase?'.format( options['logs']['dir_logs'], default=False)): os.system('rm -r ' + options['logs']['dir_logs']) else: return os.system('mkdir -p ' + options['logs']['dir_logs']) path_new_opt = os.path.join(options['logs']['dir_logs'], os.path.basename(args.path_opt)) path_args = os.path.join(options['logs']['dir_logs'], 'args.yaml') with open(path_new_opt, 'w') as f: yaml.dump(options, f, default_flow_style=False) with open(path_args, 'w') as f: yaml.dump(vars(args), f, default_flow_style=False) if exp_logger is None: # Set loggers exp_name = os.path.basename( options['logs']['dir_logs']) # add timestamp exp_logger = logger.Experiment(exp_name, options) exp_logger.add_meters('train', make_meters()) exp_logger.add_meters('test', make_meters()) if options['vqa']['trainsplit'] == 'train': exp_logger.add_meters('val', make_meters()) exp_logger.info['model_params'] = utils.params_count(model) print('Model has {} parameters'.format( exp_logger.info['model_params'])) ######################################################################################### # args.evaluate: on valset OR/AND on testset ######################################################################################### if args.evaluate: path_logger_json = os.path.join(options['logs']['dir_logs'], 'logger.json') if options['vqa']['trainsplit'] == 'train': acc1, val_results = engine.validate(val_loader, model, criterion, exp_logger, args.start_epoch, args.print_freq) # save results and compute OpenEnd accuracy exp_logger.to_json(path_logger_json) save_results(val_results, args.start_epoch, valset.split_name(), options['logs']['dir_logs'], options['vqa']['dir']) test_results, testdev_results = engine.test(test_loader, model, exp_logger, args.start_epoch, args.print_freq) # save results and DOES NOT compute OpenEnd accuracy exp_logger.to_json(path_logger_json) save_results(test_results, args.start_epoch, testset.split_name(), options['logs']['dir_logs'], options['vqa']['dir']) save_results(testdev_results, args.start_epoch, testset.split_name(testdev=True), options['logs']['dir_logs'], options['vqa']['dir']) return ######################################################################################### # Begin training on train/val or trainval/test ######################################################################################### for epoch in range(args.start_epoch + 1, options['optim']['epochs']): # if epoch > 1 and gen_utils.str2bool(args.is_augment_image) and 'options/med/' in args.path_opt: # cmd = "python main/extract.py --dir_data data/raw/vqa_med/preprocessed --dataset med --is_augment_image 1 -b 64" # os.system(cmd) # if epoch == 1 and 'options/med/' in args.path_opt: # cmd = "python main/extract.py --dir_data data/raw/vqa_med/preprocessed --dataset med --is_augment_image 0 -b 64" # os.system(cmd) # train for one epoch engine.train(train_loader, model, criterion, optimizer, exp_logger, epoch, args.print_freq, dict=io_utils.read_pickle(question_features_path), bert_dim=options["model"]["dim_q"]) if options['vqa']['trainsplit'] == 'train': # evaluate on validation set acc1, val_results = engine.validate( val_loader, model, criterion, exp_logger, epoch, args.print_freq, topk=5, dict=io_utils.read_pickle(question_features_path), bert_dim=options["model"]["dim_q"]) # remember best prec@1 and save checkpoint is_best = acc1 > best_acc1 best_acc1 = max(acc1, best_acc1) save_checkpoint( { 'epoch': epoch, 'arch': options['model']['arch'], 'best_acc1': best_acc1, 'exp_logger': exp_logger }, model.module.state_dict(), optimizer.state_dict(), options['logs']['dir_logs'], args.save_model, args.save_all_from, is_best) # save results and compute OpenEnd accuracy save_results(val_results, epoch, valset.split_name(), options['logs']['dir_logs'], options['vqa']['dir']) else: test_results, testdev_results = engine.test( test_loader, model, exp_logger, epoch, args.print_freq, topk=5, dict=io_utils.read_pickle(question_features_path), bert_dim=options["model"]["dim_q"]) # save checkpoint at every timestep save_checkpoint( { 'epoch': epoch, 'arch': options['model']['arch'], 'best_acc1': best_acc1, 'exp_logger': exp_logger }, model.module.state_dict(), optimizer.state_dict(), options['logs']['dir_logs'], args.save_model, args.save_all_from) # save results and DOES NOT compute OpenEnd accuracy save_results(test_results, epoch, testset.split_name(), options['logs']['dir_logs'], options['vqa']['dir']) save_results(testdev_results, epoch, testset.split_name(testdev=True), options['logs']['dir_logs'], options['vqa']['dir'])
def compute_prob_one_model(model_name, vqa_trainsplit="train"): parser = argparse.ArgumentParser( description='Train/Evaluate models', formatter_class=argparse.ArgumentDefaultsHelpFormatter) ################################################## # yaml options file contains all default choices # # parser.add_argument('--path_opt', default='options/breast/default.yaml', type=str, # help='path to a yaml options file') parser.add_argument( '--path_opt', default='options/med/bilinear_att_train_imagenet_h200_g4.yaml', type=str, help='path to a yaml options file') ################################################ # change cli options to modify default choices # # logs options parser.add_argument( '--dir_logs', default='logs/med/train/globalbilinear_att_train_imagenet_h200_g4', type=str, help='dir logs') # data options parser.add_argument('--vqa_trainsplit', type=str, choices=['train', 'trainval'], default=vqa_trainsplit) # model options parser.add_argument('--arch', choices=models.model_names, help='vqa model architecture: ' + ' | '.join(models.model_names)) parser.add_argument('--st_type', help='skipthoughts type') parser.add_argument('--st_dropout', type=float) parser.add_argument('--st_fixed_emb', default=None, type=utils.str2bool, help='backprop on embedding') # bert options parser.add_argument( '--bert_model', default="bert-base-multilingual-uncased", help= 'bert model: bert-base-uncased | bert-base-multilingual-uncased | bert-base-multilingual-cased' ) # image options parser.add_argument( '--is_augment_image', default='1', help='whether to augment images at the beginning of every epoch?') # optim options parser.add_argument('-lr', '--learning_rate', type=float, help='initial learning rate') parser.add_argument('-b', '--batch_size', type=int, help='mini-batch size') parser.add_argument('--epochs', type=int, help='number of total epochs to run') # options not in yaml file parser.add_argument('--start_epoch', default=0, type=int, help='manual epoch number (useful on restarts)') parser.add_argument('--resume', default='best', type=str, help='path to latest checkpoint') parser.add_argument('--save_model', default=True, type=utils.str2bool, help='able or disable save model and optim state') parser.add_argument( '--save_all_from', type=int, help='''delete the preceding checkpoint until an epoch,''' ''' then keep all (useful to save disk space)')''') parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', help='evaluate model on validation and test set', default=True) parser.add_argument('-j', '--workers', default=0, type=int, help='number of data loading workers') parser.add_argument('--print_freq', '-p', default=10, type=int, help='print frequency') ################################################ parser.add_argument('-ho', '--help_opt', dest='help_opt', action='store_true', help='show selected options before running') args = parser.parse_args() if vqa_trainsplit == "train": args.dir_logs = "logs/med/train/{}".format(model_name) else: args.dir_logs = "logs/med/trainval/{}".format(model_name) if "globalbilinear" in model_name: path_opt = model_name.replace("globalbilinear", "bilinear") if "_cased" in path_opt: path_opt = path_opt.replace("_cased", "") if "_uncased" in path_opt: path_opt = path_opt.replace("_uncased", "") elif "_cased_768" in model_name: path_opt = model_name.replace("_cased_768", "_768") elif "_uncased_768" in model_name: path_opt = model_name.replace("_uncased_768", "_768") elif "_cased" in model_name and "768" not in model_name: path_opt = model_name.replace("_cased", "") elif "_uncased" in model_name and "768" not in model_name: path_opt = model_name.replace("_uncased", "") else: path_opt = model_name path_opt = path_opt.replace("_trainval_", "_train_") args.path_opt = "{}/{}.yaml".format(args.dir_logs, path_opt) ######################################################################################### # Create options ######################################################################################### if "_cased" in model_name: args.bert_model = "bert-base-multilingual-cased" elif "_uncased" in model_name: args.bert_model = "bert-base-multilingual-uncased" if args.bert_model == "bert-base-uncased": question_features_path = BASE_EXTRACTED_QUES_FEATURES_PATH elif args.bert_model == "bert-base-multilingual-cased": question_features_path = CASED_EXTRACTED_QUES_FEATURES_PATH else: question_features_path = EXTRACTED_QUES_FEATURES_PATH options = { 'vqa': { 'trainsplit': args.vqa_trainsplit }, 'logs': { 'dir_logs': args.dir_logs }, 'model': { 'arch': args.arch, 'seq2vec': { 'type': args.st_type, 'dropout': args.st_dropout, 'fixed_emb': args.st_fixed_emb } }, 'optim': { 'lr': args.learning_rate, 'batch_size': args.batch_size, 'epochs': args.epochs } } if args.path_opt is not None: with open(args.path_opt, 'r') as handle: options_yaml = yaml.load(handle, Loader=yaml.FullLoader) options = utils.update_values(options, options_yaml) print('## args') pprint(vars(args)) print('## options') pprint(options) if args.help_opt: return # Set datasets options if 'vgenome' not in options: options['vgenome'] = None ######################################################################################### # Create needed datasets ######################################################################################### trainset = datasets.factory_VQA(options['vqa']['trainsplit'], options['vqa'], options['coco'], options['vgenome']) train_loader = trainset.data_loader( batch_size=options['optim']['batch_size'], num_workers=args.workers, shuffle=True) if options['vqa']['trainsplit'] == 'train': valset = datasets.factory_VQA('val', options['vqa'], options['coco']) val_loader = valset.data_loader( batch_size=options['optim']['batch_size'], num_workers=args.workers) if options['vqa']['trainsplit'] == 'trainval' or args.evaluate: testset = datasets.factory_VQA('test', options['vqa'], options['coco']) test_loader = testset.data_loader( batch_size=options['optim']['batch_size'], num_workers=args.workers) ######################################################################################### # Create model, criterion and optimizer ######################################################################################### model = models.factory(options['model'], trainset.vocab_words(), trainset.vocab_answers(), cuda=True, data_parallel=True) criterion = criterions.factory(options['vqa'], cuda=True) optimizer = torch.optim.Adam( filter(lambda p: p.requires_grad, model.parameters()), options['optim']['lr']) ######################################################################################### # args.resume: resume from a checkpoint OR create logs directory ######################################################################################### exp_logger = None if args.resume: args.start_epoch, best_acc1, exp_logger = load_checkpoint( model.module, optimizer, os.path.join(options['logs']['dir_logs'], args.resume)) else: # Or create logs directory if os.path.isdir(options['logs']['dir_logs']): if click.confirm( 'Logs directory already exists in {}. Erase?'.format( options['logs']['dir_logs'], default=False)): os.system('rm -r ' + options['logs']['dir_logs']) else: return os.system('mkdir -p ' + options['logs']['dir_logs']) path_new_opt = os.path.join(options['logs']['dir_logs'], os.path.basename(args.path_opt)) path_args = os.path.join(options['logs']['dir_logs'], 'args.yaml') with open(path_new_opt, 'w') as f: yaml.dump(options, f, default_flow_style=False) with open(path_args, 'w') as f: yaml.dump(vars(args), f, default_flow_style=False) if exp_logger is None: # Set loggers exp_name = os.path.basename( options['logs']['dir_logs']) # add timestamp exp_logger = logger.Experiment(exp_name, options) exp_logger.add_meters('train', make_meters()) exp_logger.add_meters('test', make_meters()) if options['vqa']['trainsplit'] == 'train': exp_logger.add_meters('val', make_meters()) exp_logger.info['model_params'] = utils.params_count(model) print('Model has {} parameters'.format( exp_logger.info['model_params'])) ######################################################################################### # args.evaluate: on valset OR/AND on testset ######################################################################################### if args.evaluate: path_logger_json = os.path.join(options['logs']['dir_logs'], 'logger.json') if options['vqa']['trainsplit'] == 'train': acc1, val_results, prob = engine.validate( val_loader, model, criterion, exp_logger, args.start_epoch, args.print_freq, dict=io_utils.read_pickle(question_features_path), bert_dim=options["model"]["dim_q"], is_return_prob=True) else: test_results, testdev_results, prob = engine.test( test_loader, model, exp_logger, 1, args.print_freq, dict=io_utils.read_pickle(question_features_path), bert_dim=options["model"]["dim_q"], is_return_prob=True) torch.cuda.empty_cache() if vqa_trainsplit == "train": return prob, val_loader else: return prob, test_loader