def train(num_epochs, exp_dir, eval_interval, learning_rate, batch_size): train_dataset = VQADataset(split='train') test_dataset = VQADataset(split='val') train_loader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=3, drop_last=True) test_loader = data.DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=2) criterion = nn.CrossEntropyLoss() total_steps = len(train_loader) model = Classifier(vocab_size=train_dataset.get_embedding_dim(), embedding_dim=300, hidden_dim=2048, dim_input=2048, dim_output=2048, top_ans=3000).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) iter = 0 for epoch in range(num_epochs): for i, (images, questions, answers, q_ids, lengths) in enumerate(train_loader): images = images.to(device) questions = questions.to(device) answers = answers.to(device) model.train() output = model(images, questions, lengths) loss = criterion(output, answers) optimizer.zero_grad() loss.backward() optimizer.step() if i % 50 == 0: curr_iter = epoch * len(train_loader) + i print('Epoch [{}/{}], Step [{}/{}], Batch Loss: {:.4f}'.format( epoch + 1, num_epochs, i + 1, total_steps, loss.item())) sys.stdout.flush() # Do some evaluations if iter > 0 and (iter) % eval_interval == 0: print('Evaluating at iter {}:'.format(iter)) curr_acc = evaluate(model, exp_dir, test_loader, train_dataset.inverse_top_answers, iter) print('Epoch [{}/{}] Approx. training accuracy: {}'.format( epoch + 1, num_epochs, curr_acc)) if not os.path.exists('models'): os.mkdir('models') if not os.path.exists('models/{}'.format(exp_dir)): os.mkdir('models/{}'.format(exp_dir)) torch.save(model.state_dict(), 'models/{}/model_iter_{}.bin'.format(exp_dir, iter)) torch.save( optimizer.state_dict(), 'models/{}/optimizer_iter_{}.bin'.format(exp_dir, iter)) iter += 1
def evaluate_hw2(cfg: DictConfig) -> float: main_utils.init(cfg) load_v2() # Load dataset path_image_train = '/datashare/train2014/COCO_train2014_' path_question_train = '/datashare/v2_OpenEnded_mscoco_train2014_questions.json' train_dataset = VQADataset(path_answers=cfg['main']['paths']['train'], path_image=path_image_train, path_questions=path_question_train) path_image_val = '/datashare/val2014/COCO_val2014_' path_question_train = '/datashare/v2_OpenEnded_mscoco_val2014_questions.json' val_dataset = VQADataset(path_answers=cfg['main']['paths']['validation'], path_image=path_image_val, path_questions=path_question_train, word_dict=train_dataset.word_dict) eval_loader = DataLoader(val_dataset, cfg['train']['batch_size'], shuffle=True, num_workers=cfg['main']['num_workers']) image_dim = train_dataset.pic_size output_dim = 2410 model = VQAModel(batch_size=cfg['train']['batch_size'], word_vocab_size=train_dataset.vocab_size, lstm_hidden=cfg['train']['num_hid'], output_dim=output_dim, dropout=cfg['train']['dropout'], word_embedding_dim=cfg['train']['word_embedding_dim'], question_output_dim=cfg['train']['question_output_dim'], image_dim=image_dim, last_hidden_fc_dim=cfg['train']['last_hidden_fc_dim']) if torch.cuda.is_available(): model = model.cuda() model.load_state_dict( torch.load('model.pkl', map_location=lambda storage, loc: storage)['model_state']) model.train(False) eval_score, eval_loss = evaluate(model, eval_loader) print(f"The evaluation score is {eval_score}") return eval_score
def main(): global args args = parser.parse_args() args_str = json.dumps(vars(args), indent=2) print('[Info] called with: ' + args_str) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) # checkpoint directory cfg.LOG_DIR = os.path.join(cfg.LOG_DIR, args.checkpoint) # select device torch.cuda.set_device(args.gpu_id) print('[Info] use gpu: {}'.format(torch.cuda.current_device())) # data print('[Info] init dataset') model_group_name, model_name = args.model.split('/') val_set = VQADataset('test', model_group_name) val_loader = torch.utils.data.DataLoader(val_set, batch_size=args.bs, shuffle=False, num_workers=args.workers, pin_memory=True) print('sample count: {}'.format(len(val_set))) # model print('[Info] construct model') model_group = import_module('models.' + model_group_name) model = getattr(model_group, model_name)(num_words=val_set.num_words, num_ans=val_set.num_ans, emb_size=get_emb_size()) model.cuda() cudnn.benchmark = True print('[Info] model name: ' + args.model) # predict fnames = [(i, 'checkpoint-{:03}.pth.tar'.format(i)) for i in range(args.start_epoch, args.end_epoch, args.epoch_freq) ] cp_files = [(i, os.path.join(cfg.LOG_DIR, fname)) for i, fname in fnames] for epoch, cp_file in cp_files: if os.path.isfile(cp_file): print("[Info] loading checkpoint '{}'".format(cp_file)) checkpoint = torch.load(cp_file) model.load_state_dict(checkpoint['state_dict']) else: print("[Info] no checkpoint found at '{}'".format(cp_file)) continue results = predict(val_loader, model) result_file = os.path.join(cfg.LOG_DIR, 'result-{:03}.json'.format(epoch)) json.dump(results, open(result_file, 'w'))
def load_datasets(config, phases): config = config['data'] # 后面就不会执行了 if 'preprocess' in config and config['preprocess']: print('Preprocessing datasets') # 对训练数据、验证数据的预处理 preprocess(data_dir=config['dir'], train_ques_file=config['train']['ques'], train_ans_file=config['train']['ans'], val_ques_file=config['val']['ques'], val_ans_file=config['val']['ans']) #处理结束,对数据进行加载 print('Loading preprocessed datasets') datafiles = {x: '{}.pkl'.format(x) for x in phases } # datafile = {'train': 'train.pkl', 'val': 'val.pkl'} raw_images = 'preprocess' in config['images'] and config['images'][ 'preprocess'] if raw_images: img_dir = {x: config[x]['img_dir'] for x in phases} else: img_dir = {x: config[x]['emb_dir'] for x in phases} datasets = { x: VQADataset(data_dir=config['dir'], qafile=datafiles[x], img_dir=img_dir[x], phase=x, img_scale=config['images']['scale'], img_crop=config['images']['crop'], raw_images=raw_images) for x in phases } print(datasets['train'][0]) print(datasets['train'][0]) batch_samplers = { x: VQABatchSampler(datasets[x], config[x]['batch_size']) for x in phases } #直接调用的DataLoader API dataloaders = { x: DataLoader(datasets[x], batch_sampler=batch_samplers[x], num_workers=config['loader']['workers']) for x in phases } dataset_sizes = {x: len(datasets[x]) for x in phases} print("ques vocab size: {}".format(len(VQADataset.ques_vocab))) print("ans vocab size: {}".format(len(VQADataset.ans_vocab))) return dataloaders, VQADataset.ques_vocab, VQADataset.ans_vocab
def _sample(indices, config): dataset = VQADataset(config.dataset_dir, output_shape=[256, 256], train=False) if config.action == "stage1": stage1_generator = Generator() _load(stage1_generator, os.path.join(config.model_dir, "stage1")) else: stage1_generator = Stage1Generator() stage2_generator = Stage2Generator() _load(stage1_generator, os.path.join(config.model_dir, "stage1")) _load(stage2_generator, os.path.join(config.model_dir, "stage2")) ims, embeds, captions = [], [], [] for idx in indices: im, embed, caption = dataset[idx] ims.append(im) embeds.append(embed) captions.append(caption) ims = torch.stack(ims, 0) embeds = torch.stack(embeds, 0) noise = Variable(torch.randn(len(indices), 100)) if config.cuda: noise = noise.cuda() embeds = Variable(embeds).cuda() else: embeds = Variable(embeds) embeds = embeds.view(len(indices), -1) fake_ims_stage1 = stage1_generator(noise, embeds) torchvision.utils.save_image(ims, "{}/real.png".format(config.sample_dir), normalize=True) torchvision.utils.save_image(fake_ims_stage1.data, "{}/fake_stage1.png".format( config.sample_dir), normalize=True) if config.action == "stage2": fake_ims_stage2 = stage2_generator(fake_ims_stage1, embeds) torchvision.utils.save_image(fake_ims_stage2.data, "{}/fake_stage2.png".format( config.sample_dir), normalize=True) _file = open("{}/captions.txt".format(config.sample_dir), "w") for i, caption in enumerate(captions): _file.write("index: {}\n".format(indices[i])) for c in caption: _file.write(c + "\n") _file.write("\n") _file.close()
def load_datasets(config, phases): datasets = { x: VQADataset(mode=x, preprocess=config['data']['preprocess']) for x in phases } batch_samplers = { x: VQABatchSampler(datasets[x], config['data']['batch_size']) for x in phases } num_workers = config['data']['num_workers'] dataloaders = { x: DataLoader(datasets[x], batch_sampler=batch_samplers[x], num_workers=num_workers) for x in phases } print("dataset size", {x: len(datasets[x]) for x in phases}) print("ques vocab size: {}".format(len(VQADataset.ques_vocab))) print("ans vocab size: {}".format(len(VQADataset.ans_vocab))) return dataloaders, VQADataset.ques_vocab, VQADataset.ans_vocab
def main(): global args args = parser.parse_args() args_str = json.dumps(vars(args), indent=2) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) # use timestamp as log subdirectory timestamp = args.timestamp cfg.LOG_DIR = os.path.join(cfg.LOG_DIR, timestamp) model_group_name, model_name = args.model.split('/') # setting log handlers sh = logging.StreamHandler(sys.stdout) sh.setLevel(logging.DEBUG) logger.addHandler(sh) logger.debug('[Info] called with: ' + args_str) logger.debug('[Info] timestamp: ' + timestamp) # select device torch.cuda.set_device(args.gpu_id) logger.debug('[Info] use gpu: {}'.format(torch.cuda.current_device())) # data assert (len(cfg.TEST.SPLITS) == 1 and cfg.TEST.SPLITS[0] in ('val2014')) logger.debug('[Info] init dataset') val_set = VQADataset('test', model_group_name) RES_DIR = '/home/lyt/code/bert-as-service-test/result' queIds, queFea, _ = load_data(split_name='val2014', RES_DIR=RES_DIR) assert queIds.tolist() == val_set.que_id.tolist() logger.debug('[Info] Clustering using {}, {} clusters'.format( args.cluster_alg, args.n_clusters)) clusfilename = '{}/{}/{}_{}_n{}.pkl'.format(RES_DIR, 'v2', 'train2014', args.cluster_alg, args.n_clusters) logger.debug('[Info] cluster file: {}'.format(clusfilename)) val_qTypeLabels = clustering(queFea, clu_num=args.n_clusters, clu_alg=args.cluster_alg, savefname=clusfilename) # model logger.debug('[Info] construct model') model_group = import_module('models.' + model_group_name) model = getattr(model_group, model_name)(num_words=val_set.num_words, num_ans=val_set.num_ans, emb_size=get_emb_size()) logger.debug('[Info] model name: ' + args.model) total_param = 0 for param in model.parameters(): total_param += param.nelement() logger.debug('[Info] total parameters: {}M'.format( math.ceil(total_param / 2**20))) model.cuda() cudnn.benchmark = True # load best model, predict logger.debug('[Info] load model ...') best_path = os.path.join(cfg.LOG_DIR, 'model-best.pth.tar') #if os.path.isfile(best_path): assert os.path.isfile(best_path) logger.debug("[Info] loading checkpoint '{}'".format(best_path)) cp_state = torch.load(best_path) best_acc = cp_state['best_acc'] logger.debug('[Info] best model with best acc {}'.format(best_acc)) model.load_state_dict(cp_state['state_dict']) #else: # logger.debug("[Info] no checkpoint found at '{}'".format(best_path)) for i in range(args.n_clusters): logger.debug('[Info] choose cluster ID: {}'.format(i)) #sel = val_qTypeLabels == args.clus_id sel = val_qTypeLabels == i val_quesIds = queIds[sel].tolist() logger.debug( '[Info] #Val set before/after clustering and choosing {}/{}'. format(queIds.shape[0], len(val_quesIds))) val_set_sub = select_subset(val_set, sel) val_loader = torch.utils.data.DataLoader(val_set_sub, batch_size=args.bs, shuffle=False, num_workers=args.workers, pin_memory=True) logger.debug('sample count: {}'.format(len(val_set_sub))) acc = validate(val_loader, model, None, None, quesIds=val_quesIds) logger.debug('Evaluate Result:\tAcc {0}'.format(acc))
def main(): global args args = parser.parse_args() args_str = json.dumps(vars(args), indent=2) # use timestamp as log subdirectory timestamp = datetime.datetime.now().strftime('%Y%m%d%H%M%S') cfg.LOG_DIR = os.path.join(cfg.LOG_DIR, timestamp) os.mkdir(cfg.LOG_DIR) json.dump(cfg, open(cfg.LOG_DIR + '/config.json', 'w'), indent=2) model_group_name, model_name = args.model.split('/') shutil.copy('models/' + model_group_name + '.py', cfg.LOG_DIR) # init ploter ploter = Ploter(timestamp) # setting log handlers fh = logging.FileHandler(os.path.join(cfg.LOG_DIR, 'log')) fh.setLevel(logging.DEBUG) fhc = logging.FileHandler('current.log') fhc.setLevel(logging.DEBUG) sh = logging.StreamHandler(sys.stdout) sh.setLevel(logging.DEBUG) fmt = '[%(asctime)-15s] %(message)s' datefmt = '%Y-%m-%d %H:%M:%S' formatter = logging.Formatter(fmt, datefmt) fh.setFormatter(formatter) fhc.setFormatter(formatter) logger.addHandler(fh) logger.addHandler(fhc) logger.addHandler(sh) logger.debug('[Info] called with: ' + args_str) logger.debug('[Info] timestamp: ' + timestamp) logger.debug('[Info] CPU random seed: {}'.format(torch.initial_seed())) logger.debug('[Info] GPU random seed: {}'.format(torch.cuda.initial_seed())) # select device torch.cuda.set_device(args.gpu_id) logger.debug('[Info] use gpu: {}'.format(torch.cuda.current_device())) # display some information train_pattern = '[Info] Training pattern: {}\n'\ '\t[train_subset by atype, no finetune, train_all_ft_atype]' if args.ft_epoch == 0: logger.debug(train_pattern.format('train_subset by atype')) elif args.ft_epoch > args.epochs: logger.debug(train_pattern.format('no finetune')) else: #0<ft_epoch<epochs logger.debug(train_pattern.format('train_all_ft_atype')) resume_train = '[Info] Resume train: {}' if args.resume: logger.debug(resume_train.format('resume train from previous best model')) else: logger.debug(resume_train.format('normal train')) pred_sub = '[Info] Predict subset: {}' if args.pred_subset: logger.debug(pred_sub.format('validate on subset by atype')) else: logger.debug(pred_sub.format('validate on all val set')) # load data logger.debug('[Info] init dataset') do_test = (len(cfg.TEST.SPLITS) == 1 and cfg.TEST.SPLITS[0] in ('train2014', 'val2014')) trn_set = VQADataset('train', model_group_name) train_loader = gen_dataloader(args, trn_set, shuffle=True) if do_test: val_set = VQADataset('test', model_group_name) val_loader = gen_dataloader(args, val_set, shuffle=False) # model emb_size = 300 if cfg.WORD_EMBEDDINGS: word_vec = merge_embeddings(cfg.WORD_EMBEDDINGS) aword = next(iter(word_vec)) emb_size = len(word_vec[aword]) logger.debug('[Info] embedding size: {}'.format(emb_size)) logger.debug('[Info] construct model, criterion and optimizer') model_group = import_module('models.' + model_group_name) model = getattr(model_group, model_name)( num_words=trn_set.num_words, num_ans=trn_set.num_ans, emb_size=emb_size) logger.debug('[Info] model name: ' + args.model) total_param = 0 for param in model.parameters(): total_param += param.nelement() logger.debug('[Info] total parameters: {}M'.format(math.ceil(total_param / 2**20))) # initialize word embedding with pretrained if cfg.WORD_EMBEDDINGS: emb = model.we.weight.data.numpy() words = trn_set.codebook['itow'] assert '<PAD>' not in word_vec fill_cnt = 0 for i, w in enumerate(words): if w in word_vec: emb[i] = word_vec[w] fill_cnt += 1 logger.debug('[debug] word embedding filling count: {}/{}' .format(fill_cnt, len(words))) model.we.weight = nn.Parameter(torch.from_numpy(emb)) if model_group_name in ('onehot_label', 'prob_label'): # initialize object embedding with pretrained obj_emb = model.obj_net[0].weight.data.numpy() if model_group_name == 'prob_label': obj_emb = obj_emb.T fill_cnt = 0 for i, line in enumerate(trn_set.objects_vocab): avail, vec = get_class_embedding(line, word_vec, emb_size) if avail: obj_emb[i] = vec fill_cnt += 1 logger.debug('[debug] class embedding filling count: {}/{}' .format(fill_cnt, len(trn_set.objects_vocab))) if model_group_name == 'prob_label': obj_emb = obj_emb.T model.obj_net[0].weight = nn.Parameter(torch.from_numpy(obj_emb)) model.cuda() if cfg.SOFT_LOSS: criterion = nn.BCEWithLogitsLoss().cuda() else: criterion = nn.CrossEntropyLoss().cuda() logger.debug('[Info] criterion name: ' + criterion.__class__.__name__) optimizer = torch.optim.Adam(model.parameters(), args.lr, weight_decay=args.wd) cudnn.benchmark = True # resume if args.resume: ckpt = torch.load(os.path.join(cfg.LOG_DIR.split('/')[0], args.ts, 'model-best.pth.tar')) best_acc = ckpt['best_acc'] start_epoch = best_epoch = ckpt['best_epoch'] model.load_state_dict(ckpt['state_dict']) optimizer.load_state_dict(ckpt['optimizer']) else: ckpt = None best_acc = 0 best_epoch = -1 start_epoch = args.start_epoch # -1 #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_decay_freq, # gamma=args.lr_decay_factor, last_epoch=start_epoch) # train logger.debug('[Info] start training...') for epoch in range(start_epoch+1, args.epochs): is_best = False lr = adjust_learning_rate(optimizer, epoch) ploter.append(epoch, lr, 'lr') if epoch == args.ft_epoch: logger.debug('[Info] finetune using atype_id {} data, at epoch {}' .format(args.atype_id, args.ft_epoch)) # load atypes RES_DIR = '/home/lyt/code/bert-as-service-test/result' queIds, aTypeIds = load_data(split_name='train2014', RES_DIR=RES_DIR) assert queIds.tolist() == trn_set.que_id.tolist() # select specified data for training sel = aTypeIds == args.atype_id trn_quesIds = queIds[sel].tolist() logger.debug('[Info] #Train set before/after selecting {}/{}' .format(queIds.shape[0], len(trn_quesIds))) trn_set = select_subset(trn_set, sel) # set train loader train_loader = gen_dataloader(args, trn_set, shuffle=True) # validation set if do_test and args.pred_subset: # load atypes queIds, aTypeIds = load_data(split_name='val2014', RES_DIR=RES_DIR) assert queIds.tolist() == val_set.que_id.tolist() # select specified data for training sel = aTypeIds == args.atype_id val_quesIds = queIds[sel].tolist() logger.debug('[Info] #Val set before/after selecting {}/{}' .format(queIds.shape[0], len(val_quesIds))) val_set = select_subset(val_set, sel) # set val loader val_loader = gen_dataloader(args, val_set, shuffle=False) loss = train(train_loader, model, criterion, optimizer, epoch) ploter.append(epoch, loss, 'train-loss') if do_test: if args.pred_subset: acc = validate(val_loader, model, criterion, epoch, quesIds=val_quesIds) else: acc = validate(val_loader, model, criterion, epoch) ploter.append(epoch, acc, 'val-acc') if acc > best_acc: is_best = True best_acc = acc best_epoch = epoch logger.debug('Evaluate Result:\tAcc {0}\tBest {1} ({2})' .format(acc, best_acc, best_epoch)) # save checkpoint state = { 'epoch': epoch, 'best_acc': best_acc, 'best_epoch': best_epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } if epoch % args.save_freq == 0: cp_fname = 'checkpoint-{:03}.pth.tar'.format(epoch) cp_path = os.path.join(cfg.LOG_DIR, cp_fname) torch.save(state, cp_path) if is_best: best_path = os.path.join(cfg.LOG_DIR, 'model-best.pth.tar') torch.save(state, best_path)
def main(): args = parser.parse_args() if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) # select device torch.cuda.set_device(args.gpu_id) print('[Info] use gpu: {}'.format(torch.cuda.current_device())) # get parameters sys.path.insert(0, args.model_dir) from params import params assert len(params) > 1 last_cfg = params[0][-1] last_cfg() get_data.main() dataset = VQADataset('test', params[0][1]) itoa = dataset.codebook['itoa'] vote_buff = [{} for i in range(len(dataset))] conf_buff = np.zeros((len(dataset), len(itoa))) sm_conf_buff = np.zeros((len(dataset), len(itoa))) l2_conf_buff = np.zeros((len(dataset), len(itoa))) que_ids = dataset.que_id for fpath, mgrp, mname, acc, cfg_func, in params: # data if cfg_func != last_cfg: cfg_func() get_data.main() last_cfg = cfg_func dataset = VQADataset('test', mgrp) itoa = dataset.codebook['itoa'] dataset.reload_obj(mgrp) dataloader = torch.utils.data.DataLoader( dataset, batch_size=args.bs, shuffle=False, num_workers=2, pin_memory=True) # model model_group = import_module('models.' + mgrp) model = getattr(model_group, mname) num_words=dataset.num_words, num_ans=dataset.num_ans, emb_size=get_emb_size()) cp_file = os.path.join(args.model_dir, fpath) checkpoint = torch.load(cp_file, map_location=lambda s, l: s.cuda(0)) model.load_state_dict(checkpoint['state_dict']) model.cuda() model.eval() # predict bar = progressbar.ProgressBar() start = 0 # sample: (que_id, img, que, [obj]) for sample in bar(dataloader): sample_var = [Variable(d).cuda() for d in list(sample)[1:]] score = model(*sample_var) sm_score = torch.nn.functional.softmax(score) l2_score = torch.nn.functional.normalize(score) bs = score.size(0) conf_buff[start:start+bs] += score.data.cpu().numpy() sm_conf_buff[start:start+bs] += sm_score.data.cpu().numpy() l2_conf_buff[start:start+bs] += l2_score.data.cpu().numpy() _, ans_ids = torch.max(score.data, dim=1) for i, ans_id in enumerate(ans_ids): ans = itoa[ans_id] ans_score = acc + vote_buff[start + i].get(ans, 0) vote_buff[start + i][ans] = ans_score start += bs
model = create_model(img_dim, img_dim, num_classes=num_classes, max_seq_length=max_seq_length, embedding_matrix=get_embeddings(), ) if i == 0: model.summary() print(f"Training for k index={i}/{num_k}") dataset = VQADataset(dataset_dir, 'training', text_inputs, num_classes, img_out_shape=[img_dim, img_dim], validation_split=VALIDATION_SPLIT, img_preprocessing_function=preprocess_input, img_generator=img_data_gen, k_idx=i, ) dataset_valid = VQADataset(dataset_dir, 'validation', text_inputs, num_classes, img_out_shape=[img_dim, img_dim], validation_split=VALIDATION_SPLIT, img_preprocessing_function=preprocess_input, img_generator=img_data_gen, k_idx=i, ) train_dataset = tf.data.Dataset.from_generator( lambda: dataset, output_types=((tf.float32, tf.float32), tf.int32), output_shapes=(([img_dim, img_dim, 3], [max_seq_length]), []),
def main(): global args args = parser.parse_args() args_str = json.dumps(vars(args), indent=2) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) # use timestamp as log subdirectory timestamp = datetime.datetime.now().strftime('%Y%m%d%H%M%S') cfg.LOG_DIR = os.path.join(cfg.LOG_DIR, timestamp) os.mkdir(cfg.LOG_DIR) json.dump(cfg, open(cfg.LOG_DIR + '/config.json', 'w'), indent=2) model_group_name, model_name = args.model.split('/') shutil.copy('models/' + model_group_name + '.py', cfg.LOG_DIR) # init ploter ploter = Ploter(timestamp) # setting log handlers fh = logging.FileHandler(os.path.join(cfg.LOG_DIR, 'log')) fh.setLevel(logging.DEBUG) fhc = logging.FileHandler('current.log') fhc.setLevel(logging.DEBUG) sh = logging.StreamHandler(sys.stdout) sh.setLevel(logging.DEBUG) fmt = '[%(asctime)-15s] %(message)s' datefmt = '%Y-%m-%d %H:%M:%S' formatter = logging.Formatter(fmt, datefmt) fh.setFormatter(formatter) fhc.setFormatter(formatter) logger.addHandler(fh) logger.addHandler(fhc) logger.addHandler(sh) logger.debug('[Info] called with: ' + args_str) logger.debug('[Info] timestamp: ' + timestamp) logger.debug('[Info] CPU random seed: {}'.format(torch.initial_seed())) logger.debug('[Info] GPU random seed: {}'.format( torch.cuda.initial_seed())) # select device torch.cuda.set_device(args.gpu_id) logger.debug('[Info] use gpu: {}'.format(torch.cuda.current_device())) # data logger.debug('[Info] init dataset') do_test = (len(cfg.TEST.SPLITS) == 1 and cfg.TEST.SPLITS[0] in ('train2014', 'val2014')) trn_set = VQADataset('train', model_group_name) train_loader = torch.utils.data.DataLoader(trn_set, batch_size=args.bs, shuffle=True, num_workers=args.workers, pin_memory=True) if do_test: val_set = VQADataset('test', model_group_name) val_loader = torch.utils.data.DataLoader(val_set, batch_size=args.bs, shuffle=False, num_workers=args.workers, pin_memory=True) # model emb_size = 300 if cfg.WORD_EMBEDDINGS: word_vec = merge_embeddings(cfg.WORD_EMBEDDINGS) aword = next(iter(word_vec)) emb_size = len(word_vec[aword]) logger.debug('[Info] embedding size: {}'.format(emb_size)) logger.debug('[Info] construct model, criterion and optimizer') model_group = import_module('models.' + model_group_name) model = getattr(model_group, model_name)(num_words=trn_set.num_words, num_ans=trn_set.num_ans, emb_size=emb_size) logger.debug('[Info] model name: ' + args.model) total_param = 0 for param in model.parameters(): total_param += param.nelement() logger.debug('[Info] total parameters: {}M'.format( math.ceil(total_param / 2**20))) # initialize word embedding with pretrained if cfg.WORD_EMBEDDINGS: emb = model.we.weight.data.numpy() words = trn_set.codebook['itow'] assert '<PAD>' not in word_vec fill_cnt = 0 for i, w in enumerate(words): if w in word_vec: emb[i] = word_vec[w] fill_cnt += 1 logger.debug('[debug] word embedding filling count: {}/{}'.format( fill_cnt, len(words))) model.we.weight = nn.Parameter(torch.from_numpy(emb)) if model_group_name in ('onehot_label', 'prob_label'): # initialize object embedding with pretrained obj_emb = model.obj_net[0].weight.data.numpy() if model_group_name == 'prob_label': obj_emb = obj_emb.T fill_cnt = 0 for i, line in enumerate(trn_set.objects_vocab): avail, vec = get_class_embedding(line, word_vec, emb_size) if avail: obj_emb[i] = vec fill_cnt += 1 logger.debug('[debug] class embedding filling count: {}/{}'.format( fill_cnt, len(trn_set.objects_vocab))) if model_group_name == 'prob_label': obj_emb = obj_emb.T model.obj_net[0].weight = nn.Parameter(torch.from_numpy(obj_emb)) model.cuda() if cfg.SOFT_LOSS: criterion = nn.BCEWithLogitsLoss().cuda() else: criterion = nn.CrossEntropyLoss().cuda() logger.debug('[Info] criterion name: ' + criterion.__class__.__name__) optimizer = torch.optim.Adam(model.parameters(), args.lr, weight_decay=args.wd) cudnn.benchmark = True # train logger.debug('[Info] start training...') is_best = False best_acc = 0 best_epoch = -1 for epoch in range(args.start_epoch, args.epochs): lr = adjust_learning_rate(optimizer, epoch) ploter.append(epoch, lr, 'lr') loss = train(train_loader, model, criterion, optimizer, epoch) ploter.append(epoch, loss, 'train-loss') if do_test: acc = validate(val_loader, model, criterion, epoch) ploter.append(epoch, acc, 'val-acc') if acc > best_acc: is_best = True best_acc = acc best_epoch = epoch logger.debug('Evaluate Result:\t' 'Acc {0}\t' 'Best {1} ({2})'.format(acc, best_acc, best_epoch)) # save checkpoint cp_fname = 'checkpoint-{:03}.pth.tar'.format(epoch) cp_path = os.path.join(cfg.LOG_DIR, cp_fname) state = { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_acc': best_acc, 'optimizer': optimizer.state_dict() } if epoch % args.save_freq == 0: torch.save(state, cp_path) if is_best: best_path = os.path.join(cfg.LOG_DIR, 'model-best.pth.tar') torch.save(state, best_path)
ibatch = batch['image'].permute(0, 3, 1, 2).float().to(device) qbatch = token2id(batch['question'], vocab.qvocab).to(device) output = model(ibatch, qbatch) aids = output.argmax(-1).tolist() answers = id2answer(aids, vocab.avocab) for qid, answer in zip(idbatch, answers): results.append({'question_id': qid, 'answer': answer}) with open('results.json', 'w') as f: json.dump(results, f) print('Finished evaluation!') if __name__ == "__main__": dataset_train = VQADataset(data_dir, 'train') dataset_val = VQADataset(data_dir, 'val') dataset_test = VQADataset(data_dir, 'test') vocab = VQAVocab(data_dir) model = CoNet(len(vocab.qvocab), emb_dim, len(vocab.avocab), useco=use_coatt).to(device) for name, param in model.named_parameters(): print(name, param.requires_grad, param.is_cuda, param.size()) # assert param.is_cuda # train(model, dataset_train, dataset_val, vocab)
def main(cfg: DictConfig) -> None: """ Run the code following a given configuration :param cfg: configuration file retrieved from hydra framework """ main_utils.init(cfg) logger = TrainLogger(exp_name_prefix=cfg['main']['experiment_name_prefix'], logs_dir=cfg['main']['paths']['logs']) logger.write(OmegaConf.to_yaml(cfg)) # Set seed for results reproduction main_utils.set_seed(cfg['main']['seed']) # Load dataset path_image_train = '/datashare/train2014/COCO_train2014_' path_question_train = '/datashare/v2_OpenEnded_mscoco_train2014_questions.json' train_dataset = VQADataset(path_answers=cfg['main']['paths']['train'], path_image=path_image_train, path_questions=path_question_train) path_image_val = '/datashare/val2014/COCO_val2014_' path_question_val = '/datashare/v2_OpenEnded_mscoco_val2014_questions.json' val_dataset = VQADataset(path_answers=cfg['main']['paths']['validation'], path_image=path_image_val, path_questions=path_question_val, word_dict=train_dataset.word_dict) train_loader = DataLoader(train_dataset, cfg['train']['batch_size'], shuffle=True, num_workers=cfg['main']['num_workers']) eval_loader = DataLoader(val_dataset, cfg['train']['batch_size'], shuffle=True, num_workers=cfg['main']['num_workers']) image_dim = train_dataset.pic_size output_dim = 2410 # possible answers model = VQAModel(batch_size=cfg['train']['batch_size'], word_vocab_size=train_dataset.vocab_size, lstm_hidden=cfg['train']['num_hid'], output_dim=output_dim, dropout=cfg['train']['dropout'], word_embedding_dim=cfg['train']['word_embedding_dim'], question_output_dim=cfg['train']['question_output_dim'], image_dim=image_dim, last_hidden_fc_dim=cfg['train']['last_hidden_fc_dim']) if cfg['main']['parallel']: model = torch.nn.DataParallel(model) if torch.cuda.is_available(): model = model.cuda() logger.write(main_utils.get_model_string(model)) # Run model train_params = train_utils.get_train_params(cfg) # Report metrics and hyper parameters to tensorboard metrics = train(model, train_loader, eval_loader, train_params, logger) hyper_parameters = main_utils.get_flatten_dict(cfg['train']) logger.report_metrics_hyper_params(hyper_parameters, metrics)
def __init__(self, model_info, split, save_dir): assert len(model_info) > 0 assert len(cfg.TEST.SPLITS) == 1 and cfg.TEST.SPLITS[0] == split model_info = sorted(model_info, key=itemgetter(0)) self._split = split self.model_info = model_info self.save_dir = save_dir # load model self._pred_ans = [] self._scores = [] self._att_weights = [] dataset = VQADataset('test', model_info[0][0]) emb_size = get_emb_size() for model_group_name, model_name, cp_file in model_info: cache_file = cp_file + '.cache' if os.path.isfile(cache_file): print("load from cache: '{}".format(cache_file)) cache = pickle.load(open(cache_file, 'rb')) self._pred_ans.append(cache['pred_ans']) self._scores.append(cache['scores']) self._att_weights.append(cache['att_weights']) continue # dataset dataset.reload_obj(model_group_name) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.bs, shuffle=False, num_workers=2, pin_memory=True) # model model_group = import_module('models.' + model_group_name) model = getattr(model_group, model_name)(num_words=dataset.num_words, num_ans=dataset.num_ans, emb_size=emb_size) checkpoint = torch.load(cp_file, map_location=lambda s, l: s.cuda(0)) model.load_state_dict(checkpoint['state_dict']) model.cuda() model.eval() # predicting itoa = dataloader.dataset.codebook['itoa'] batch_att_weight = [] pred_ans = [] bar = progressbar.ProgressBar() print('predicting answers...') # sample: (que_id, img, que, [obj]) for sample in bar(dataloader): # setting hook att_weight_buff = torch.FloatTensor(len(sample[0]), 36) def get_weight(self, input, output): att_weight_buff.copy_(output.data.view_as(att_weight_buff)) hook = model.att_net.register_forward_hook(get_weight) # forward sample_var = [Variable(d).cuda() for d in list(sample)[1:]] score = model(*sample_var) att_weight = F.softmax(Variable(att_weight_buff)).data.numpy() batch_att_weight.append(att_weight) pred_ans.extend(format_result(sample[0], score, itoa)) hook.remove() att_weights = np.vstack(batch_att_weight) # evaluation print('evaluting results...') if split in ('train2014', 'val2014'): vqa_eval = get_eval(pred_ans, split) scores = [] for i in range(len(dataset)): qid = int(dataset[i][0]) score = vqa_eval.evalQA.get(qid) scores.append(score) else: scores = None self._pred_ans.append(pred_ans) self._scores.append(scores) self._att_weights.append(att_weights) # save cache cache = {} cache['pred_ans'] = pred_ans cache['scores'] = scores cache['att_weights'] = att_weights pickle.dump(cache, open(cache_file, 'wb')) print('done.') # load data print('load raw data...') split_fname = '{}/raw-{}.json'.format(cfg.DATA_DIR, split) self._data = json.load(open(split_fname)) print('load boxes...') self._boxes = self._load_box() # query key self._question = None self._answer = None self._condition = None # query result self._r_question = None self._r_answer = None self._r_condition = None # dirty flag self._d_question = True self._d_answer = True self._d_condition = True self.last_results = None