def make_data_loader(spec, tag=''): if spec is None: return None dataset = datasets.make(spec['dataset']) dataset = datasets.make(spec['wrapper'], args={'dataset': dataset}) log('{} dataset: size={}'.format(tag, len(dataset))) for k, v in dataset[0].items(): log(' {}: shape={}'.format(k, tuple(v.shape))) loader = DataLoader(dataset, batch_size=spec['batch_size'], shuffle=(tag == 'train'), num_workers=8, pin_memory=True) return loader
def main(config): random.seed(0) np.random.seed(0) torch.manual_seed(0) torch.cuda.manual_seed(0) # torch.backends.cudnn.deterministic = True # torch.backends.cudnn.benchmark = False ##### Dataset ##### dataset = datasets.make(config['dataset'], **config['test']) utils.log('meta-test set: {} (x{}), {}'.format(dataset[0][0].shape, len(dataset), dataset.n_classes)) loader = DataLoader(dataset, config['test']['n_episode'], collate_fn=datasets.collate_fn, num_workers=1, pin_memory=True) ##### Model ##### ckpt = torch.load(config['load']) inner_args = utils.config_inner_args(config.get('inner_args')) model = models.load(ckpt, load_clf=(not inner_args['reset_classifier'])) if args.efficient: model.go_efficient() if config.get('_parallel'): model = nn.DataParallel(model) utils.log('num params: {}'.format(utils.compute_n_params(model))) ##### Evaluation ##### model.eval() aves_va = utils.AverageMeter() va_lst = [] for epoch in range(1, config['epoch'] + 1): for data in tqdm(loader, leave=False): x_shot, x_query, y_shot, y_query = data x_shot, y_shot = x_shot.cuda(), y_shot.cuda() x_query, y_query = x_query.cuda(), y_query.cuda() if inner_args['reset_classifier']: if config.get('_parallel'): model.module.reset_classifier() else: model.reset_classifier() logits = model(x_shot, x_query, y_shot, inner_args, meta_train=False) logits = logits.view(-1, config['test']['n_way']) labels = y_query.view(-1) pred = torch.argmax(logits, dim=1) acc = utils.compute_acc(pred, labels) aves_va.update(acc, 1) va_lst.append(acc) print('test epoch {}: acc={:.2f} +- {:.2f} (%)'.format( epoch, aves_va.item() * 100, utils.mean_confidence_interval(va_lst) * 100))
def main(config): svname = args.name if svname is None: svname = 'classifier_{}'.format(config['train_dataset']) svname += '_' + config['model_args']['encoder'] clsfr = config['model_args']['classifier'] if clsfr != 'linear-classifier': svname += '-' + clsfr if args.tag is not None: svname += '_' + args.tag save_path = os.path.join('./save', svname) utils.ensure_path(save_path) utils.set_log_path(save_path) writer = SummaryWriter(os.path.join(save_path, 'tensorboard')) yaml.dump(config, open(os.path.join(save_path, 'config.yaml'), 'w')) #### Dataset #### # train train_dataset = datasets.make(config['train_dataset'], **config['train_dataset_args']) augmentations = [ transforms.Compose([ transforms.RandomHorizontalFlip(p=0.5), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]), transforms.Compose([ transforms.RandomResizedCrop(size=(80, 80), scale=(0.08, 1.0), ratio=(0.75, 1.3333)), transforms.RandomHorizontalFlip(p=0.5), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]), transforms.Compose([ transforms.RandomRotation(35), transforms.RandomHorizontalFlip(p=0.5), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]), transforms.Compose([ transforms.ColorJitter(0.4, 0.4, 0.4, 0.1), transforms.RandomHorizontalFlip(p=0.5), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]), transforms.Compose([ transforms.RandomResizedCrop(size=(80, 80), scale=(0.08, 1.0), ratio=(0.75, 1.3333)), transforms.RandomRotation(35), transforms.RandomHorizontalFlip(p=0.5), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]), transforms.Compose([ transforms.RandomRotation(35), transforms.ColorJitter(0.4, 0.4, 0.4, 0.1), transforms.RandomHorizontalFlip(p=0.5), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]), transforms.Compose([ transforms.RandomResizedCrop(size=(80, 80), scale=(0.08, 1.0), ratio=(0.75, 1.3333)), transforms.ColorJitter(0.4, 0.4, 0.4, 0.1), transforms.RandomHorizontalFlip(p=0.5), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]), transforms.Compose([ transforms.RandomRotation(35), transforms.RandomResizedCrop(size=(80, 80), scale=(0.08, 1.0), ratio=(0.75, 1.3333)), transforms.ColorJitter(0.4, 0.4, 0.4, 0.1), transforms.RandomHorizontalFlip(p=0.5), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) ] train_dataset.transform = augmentations[int(config['_a'])] print(train_dataset.transform) print("_a", config['_a']) input("Continue with these augmentations?") train_loader = DataLoader(train_dataset, config['batch_size'], shuffle=True, num_workers=0, pin_memory=True) utils.log('train dataset: {} (x{}), {}'.format(train_dataset[0][0].shape, len(train_dataset), train_dataset.n_classes)) if config.get('visualize_datasets'): utils.visualize_dataset(train_dataset, 'train_dataset', writer) # val if config.get('val_dataset'): eval_val = True val_dataset = datasets.make(config['val_dataset'], **config['val_dataset_args']) val_loader = DataLoader(val_dataset, config['batch_size'], num_workers=0, pin_memory=True) utils.log('val dataset: {} (x{}), {}'.format(val_dataset[0][0].shape, len(val_dataset), val_dataset.n_classes)) if config.get('visualize_datasets'): utils.visualize_dataset(val_dataset, 'val_dataset', writer) else: eval_val = False # few-shot eval if config.get('fs_dataset'): ef_epoch = config.get('eval_fs_epoch') if ef_epoch is None: ef_epoch = 5 eval_fs = True fs_dataset = datasets.make(config['fs_dataset'], **config['fs_dataset_args']) utils.log('fs dataset: {} (x{}), {}'.format(fs_dataset[0][0].shape, len(fs_dataset), fs_dataset.n_classes)) if config.get('visualize_datasets'): utils.visualize_dataset(fs_dataset, 'fs_dataset', writer) n_way = 5 n_query = 15 n_shots = [1, 5] fs_loaders = [] for n_shot in n_shots: fs_sampler = CategoriesSampler(fs_dataset.label, 200, n_way, n_shot + n_query, ep_per_batch=4) fs_loader = DataLoader(fs_dataset, batch_sampler=fs_sampler, num_workers=0, pin_memory=True) fs_loaders.append(fs_loader) else: eval_fs = False ######## #### Model and Optimizer #### if config.get('load'): model_sv = torch.load(config['load']) model = models.load(model_sv) else: model = models.make(config['model'], **config['model_args']) if eval_fs: fs_model = models.make('meta-baseline', encoder=None) fs_model.encoder = model.encoder if config.get('_parallel'): model = nn.DataParallel(model) if eval_fs: fs_model = nn.DataParallel(fs_model) utils.log('num params: {}'.format(utils.compute_n_params(model))) optimizer, lr_scheduler = utils.make_optimizer(model.parameters(), config['optimizer'], **config['optimizer_args']) ######## max_epoch = config['max_epoch'] save_epoch = config.get('save_epoch') max_va = 0. timer_used = utils.Timer() timer_epoch = utils.Timer() for epoch in range(1, max_epoch + 1 + 1): if epoch == max_epoch + 1: if not config.get('epoch_ex'): break train_dataset.transform = train_dataset.default_transform print(train_dataset.transform) train_loader = DataLoader(train_dataset, config['batch_size'], shuffle=True, num_workers=0, pin_memory=True) timer_epoch.s() aves_keys = ['tl', 'ta', 'vl', 'va'] if eval_fs: for n_shot in n_shots: aves_keys += ['fsa-' + str(n_shot)] aves = {k: utils.Averager() for k in aves_keys} # train model.train() writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch) for data, label in tqdm(train_loader, desc='train', leave=False): # for data, label in train_loader: data, label = data.cuda(), label.cuda() logits = model(data) loss = F.cross_entropy(logits, label) acc = utils.compute_acc(logits, label) optimizer.zero_grad() loss.backward() optimizer.step() aves['tl'].add(loss.item()) aves['ta'].add(acc) logits = None loss = None # eval if eval_val: model.eval() for data, label in tqdm(val_loader, desc='val', leave=False): data, label = data.cuda(), label.cuda() with torch.no_grad(): logits = model(data) loss = F.cross_entropy(logits, label) acc = utils.compute_acc(logits, label) aves['vl'].add(loss.item()) aves['va'].add(acc) if eval_fs and (epoch % ef_epoch == 0 or epoch == max_epoch + 1): fs_model.eval() for i, n_shot in enumerate(n_shots): np.random.seed(0) for data, _ in tqdm(fs_loaders[i], desc='fs-' + str(n_shot), leave=False): x_shot, x_query = fs.split_shot_query(data.cuda(), n_way, n_shot, n_query, ep_per_batch=4) label = fs.make_nk_label(n_way, n_query, ep_per_batch=4).cuda() with torch.no_grad(): logits = fs_model(x_shot, x_query).view(-1, n_way) acc = utils.compute_acc(logits, label) aves['fsa-' + str(n_shot)].add(acc) # post if lr_scheduler is not None: lr_scheduler.step() for k, v in aves.items(): aves[k] = v.item() t_epoch = utils.time_str(timer_epoch.t()) t_used = utils.time_str(timer_used.t()) t_estimate = utils.time_str(timer_used.t() / epoch * max_epoch) if epoch <= max_epoch: epoch_str = str(epoch) else: epoch_str = 'ex' log_str = 'epoch {}, train {:.4f}|{:.4f}'.format( epoch_str, aves['tl'], aves['ta']) writer.add_scalars('loss', {'train': aves['tl']}, epoch) writer.add_scalars('acc', {'train': aves['ta']}, epoch) if eval_val: log_str += ', val {:.4f}|{:.4f}'.format(aves['vl'], aves['va']) writer.add_scalars('loss', {'val': aves['vl']}, epoch) writer.add_scalars('acc', {'val': aves['va']}, epoch) if eval_fs and (epoch % ef_epoch == 0 or epoch == max_epoch + 1): log_str += ', fs' for n_shot in n_shots: key = 'fsa-' + str(n_shot) log_str += ' {}: {:.4f}'.format(n_shot, aves[key]) writer.add_scalars('acc', {key: aves[key]}, epoch) if epoch <= max_epoch: log_str += ', {} {}/{}'.format(t_epoch, t_used, t_estimate) else: log_str += ', {}'.format(t_epoch) utils.log(log_str) if config.get('_parallel'): model_ = model.module else: model_ = model training = { 'epoch': epoch, 'optimizer': config['optimizer'], 'optimizer_args': config['optimizer_args'], 'optimizer_sd': optimizer.state_dict(), } save_obj = { 'file': __file__, 'config': config, 'model': config['model'], 'model_args': config['model_args'], 'model_sd': model_.state_dict(), 'training': training, } if epoch <= max_epoch: torch.save(save_obj, os.path.join(save_path, 'epoch-last.pth')) if (save_epoch is not None) and epoch % save_epoch == 0: torch.save( save_obj, os.path.join(save_path, 'epoch-{}.pth'.format(epoch))) if aves['va'] > max_va: max_va = aves['va'] torch.save(save_obj, os.path.join(save_path, 'max-va.pth')) else: torch.save(save_obj, os.path.join(save_path, 'epoch-ex.pth')) writer.flush()
def main(config): # Environment setup save_dir = config['save_dir'] utils.ensure_path(save_dir) with open(osp.join(save_dir, 'config.yaml'), 'w') as f: yaml.dump(config, f, sort_keys=False) global log, writer logger = set_logger(osp.join(save_dir, 'log.txt')) log = logger.info writer = SummaryWriter(osp.join(save_dir, 'tensorboard')) os.environ['WANDB_NAME'] = config['exp_name'] os.environ['WANDB_DIR'] = config['save_dir'] if not config.get('wandb_upload', False): os.environ['WANDB_MODE'] = 'dryrun' t = config['wandb'] os.environ['WANDB_API_KEY'] = t['api_key'] wandb.init(project=t['project'], entity=t['entity'], config=config) log('logging init done.') log(f'wandb id: {wandb.run.id}') # Dataset, model and optimizer train_dataset = datasets.make((config['train_dataset'])) test_dataset = datasets.make((config['test_dataset'])) model = models.make(config['model'], args=None).cuda() log(f'model #params: {utils.compute_num_params(model)}') n_gpus = len(os.environ['CUDA_VISIBLE_DEVICES'].split(',')) if n_gpus > 1: model = nn.DataParallel(model) optimizer = utils.make_optimizer(model.parameters(), config['optimizer']) train_loader = DataLoader(train_dataset, config['batch_size'], shuffle=True, num_workers=8, pin_memory=True) test_loader = DataLoader(test_dataset, config['batch_size'], num_workers=8, pin_memory=True) # Ready for training max_epoch = config['max_epoch'] n_milestones = config.get('n_milestones', 1) milestone_epoch = max_epoch // n_milestones min_test_loss = 1e18 sample_batch_train = sample_data_batch(train_dataset).cuda() sample_batch_test = sample_data_batch(test_dataset).cuda() epoch_timer = utils.EpochTimer(max_epoch) for epoch in range(1, max_epoch + 1): log_text = f'epoch {epoch}' # Train model.train() adjust_lr(optimizer, epoch, max_epoch, config) log_temp_scalar('lr', optimizer.param_groups[0]['lr'], epoch) ave_scalars = {k: utils.Averager() for k in ['loss']} pbar = tqdm(train_loader, desc='train', leave=False) for data in pbar: data = data.cuda() t = train_step(model, data, data, optimizer) for k, v in t.items(): ave_scalars[k].add(v, len(data)) pbar.set_description(desc=f"train loss:{t['loss']:.4f}") log_text += ', train:' for k, v in ave_scalars.items(): v = v.item() log_text += f' {k}={v:.4f}' log_temp_scalar('train/' + k, v, epoch) # Test model.eval() ave_scalars = {k: utils.Averager() for k in ['loss']} pbar = tqdm(test_loader, desc='test', leave=False) for data in pbar: data = data.cuda() t = eval_step(model, data, data) for k, v in t.items(): ave_scalars[k].add(v, len(data)) pbar.set_description(desc=f"test loss:{t['loss']:.4f}") log_text += ', test:' for k, v in ave_scalars.items(): v = v.item() log_text += f' {k}={v:.4f}' log_temp_scalar('test/' + k, v, epoch) test_loss = ave_scalars['loss'].item() if epoch % milestone_epoch == 0: with torch.no_grad(): pred = model(sample_batch_train).clamp(0, 1) video_batch = torch.cat([sample_batch_train, pred], dim=0) log_temp_videos('train/videos', video_batch, epoch) img_batch = video_batch[:, :, 3, :, :] log_temp_images('train/images', img_batch, epoch) pred = model(sample_batch_test).clamp(0, 1) video_batch = torch.cat([sample_batch_test, pred], dim=0) log_temp_videos('test/videos', video_batch, epoch) img_batch = video_batch[:, :, 3, :, :] log_temp_images('test/images', img_batch, epoch) # Summary and save log_text += ', {} {}/{}'.format(*epoch_timer.step()) log(log_text) model_ = model.module if n_gpus > 1 else model model_spec = config['model'] model_spec['sd'] = model_.state_dict() optimizer_spec = config['optimizer'] optimizer_spec['sd'] = optimizer.state_dict() pth_file = { 'model': model_spec, 'optimizer': optimizer_spec, 'epoch': epoch, } if test_loss < min_test_loss: min_test_loss = test_loss wandb.run.summary['min_test_loss'] = min_test_loss torch.save(pth_file, osp.join(save_dir, 'min-test-loss.pth')) torch.save(pth_file, osp.join(save_dir, 'epoch-last.pth')) writer.flush()
def main(config): svname = args.name if svname is None: svname = 'meta_{}-{}shot'.format(config['train_dataset'], config['n_shot']) svname += '_' + config['model'] if config['model_args'].get('encoder'): svname += '-' + config['model_args']['encoder'] if config['model_args'].get('prog_synthesis'): svname += '-' + config['model_args']['prog_synthesis'] svname += '-seed' + str(args.seed) if args.tag is not None: svname += '_' + args.tag save_path = os.path.join(args.save_dir, svname) utils.ensure_path(save_path, remove=False) utils.set_log_path(save_path) writer = SummaryWriter(os.path.join(save_path, 'tensorboard')) yaml.dump(config, open(os.path.join(save_path, 'config.yaml'), 'w')) logger = utils.Logger(file_name=os.path.join(save_path, "log_sdout.txt"), file_mode="a+", should_flush=True) #### Dataset #### n_way, n_shot = config['n_way'], config['n_shot'] n_query = config['n_query'] if config.get('n_train_way') is not None: n_train_way = config['n_train_way'] else: n_train_way = n_way if config.get('n_train_shot') is not None: n_train_shot = config['n_train_shot'] else: n_train_shot = n_shot if config.get('ep_per_batch') is not None: ep_per_batch = config['ep_per_batch'] else: ep_per_batch = 1 random_state = np.random.RandomState(args.seed) print('seed:', args.seed) # train train_dataset = datasets.make(config['train_dataset'], **config['train_dataset_args']) utils.log('train dataset: {} (x{})'.format(train_dataset[0][0].shape, len(train_dataset))) if config.get('visualize_datasets'): utils.visualize_dataset(train_dataset, 'train_dataset', writer) train_sampler = BongardSampler(train_dataset.n_tasks, config['train_batches'], ep_per_batch, random_state.randint(2**31)) train_loader = DataLoader(train_dataset, batch_sampler=train_sampler, num_workers=8, pin_memory=True) # tvals tval_loaders = {} tval_name_ntasks_dict = { 'tval': 2000, 'tval_ff': 600, 'tval_bd': 480, 'tval_hd_comb': 400, 'tval_hd_novel': 320 } # numbers depend on dataset for tval_type in tval_name_ntasks_dict.keys(): if config.get('{}_dataset'.format(tval_type)): tval_dataset = datasets.make( config['{}_dataset'.format(tval_type)], **config['{}_dataset_args'.format(tval_type)]) utils.log('{} dataset: {} (x{})'.format(tval_type, tval_dataset[0][0].shape, len(tval_dataset))) if config.get('visualize_datasets'): utils.visualize_dataset(tval_dataset, 'tval_ff_dataset', writer) tval_sampler = BongardSampler( tval_dataset.n_tasks, n_batch=tval_name_ntasks_dict[tval_type] // ep_per_batch, ep_per_batch=ep_per_batch, seed=random_state.randint(2**31)) tval_loader = DataLoader(tval_dataset, batch_sampler=tval_sampler, num_workers=8, pin_memory=True) tval_loaders.update({tval_type: tval_loader}) else: tval_loaders.update({tval_type: None}) # val val_dataset = datasets.make(config['val_dataset'], **config['val_dataset_args']) utils.log('val dataset: {} (x{})'.format(val_dataset[0][0].shape, len(val_dataset))) if config.get('visualize_datasets'): utils.visualize_dataset(val_dataset, 'val_dataset', writer) val_sampler = BongardSampler(val_dataset.n_tasks, n_batch=900 // ep_per_batch, ep_per_batch=ep_per_batch, seed=random_state.randint(2**31)) val_loader = DataLoader(val_dataset, batch_sampler=val_sampler, num_workers=8, pin_memory=True) ######## #### Model and optimizer #### if config.get('load'): print('loading pretrained model: ', config['load']) model = models.load(torch.load(config['load'])) else: model = models.make(config['model'], **config['model_args']) if config.get('load_encoder'): print('loading pretrained encoder: ', config['load_encoder']) encoder = models.load(torch.load(config['load_encoder'])).encoder model.encoder.load_state_dict(encoder.state_dict()) if config.get('load_prog_synthesis'): print('loading pretrained program synthesis model: ', config['load_prog_synthesis']) prog_synthesis = models.load( torch.load(config['load_prog_synthesis'])) model.prog_synthesis.load_state_dict(prog_synthesis.state_dict()) if config.get('_parallel'): model = nn.DataParallel(model) utils.log('num params: {}'.format(utils.compute_n_params(model))) optimizer, lr_scheduler = utils.make_optimizer(model.parameters(), config['optimizer'], **config['optimizer_args']) ######## max_epoch = config['max_epoch'] save_epoch = config.get('save_epoch') max_va = 0. timer_used = utils.Timer() timer_epoch = utils.Timer() aves_keys = ['tl', 'ta', 'vl', 'va'] tval_tuple_lst = [] for k, v in tval_loaders.items(): if v is not None: loss_key = 'tvl' + k.split('tval')[-1] acc_key = ' tva' + k.split('tval')[-1] aves_keys.append(loss_key) aves_keys.append(acc_key) tval_tuple_lst.append((k, v, loss_key, acc_key)) trlog = dict() for k in aves_keys: trlog[k] = [] for epoch in range(1, max_epoch + 1): timer_epoch.s() aves = {k: utils.Averager() for k in aves_keys} # train model.train() if config.get('freeze_bn'): utils.freeze_bn(model) writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch) for data, label in tqdm(train_loader, desc='train', leave=False): x_shot, x_query = fs.split_shot_query(data.cuda(), n_train_way, n_train_shot, n_query, ep_per_batch=ep_per_batch) label_query = fs.make_nk_label(n_train_way, n_query, ep_per_batch=ep_per_batch).cuda() if config['model'] == 'snail': # only use one selected label_query query_dix = random_state.randint(n_train_way * n_query) label_query = label_query.view(ep_per_batch, -1)[:, query_dix] x_query = x_query[:, query_dix:query_dix + 1] if config['model'] == 'maml': # need grad in maml model.zero_grad() logits = model(x_shot, x_query).view(-1, n_train_way) loss = F.cross_entropy(logits, label_query) acc = utils.compute_acc(logits, label_query) optimizer.zero_grad() loss.backward() optimizer.step() aves['tl'].add(loss.item()) aves['ta'].add(acc) logits = None loss = None # eval model.eval() for name, loader, name_l, name_a in [('val', val_loader, 'vl', 'va') ] + tval_tuple_lst: if config.get('{}_dataset'.format(name)) is None: continue np.random.seed(0) for data, _ in tqdm(loader, desc=name, leave=False): x_shot, x_query = fs.split_shot_query( data.cuda(), n_way, n_shot, n_query, ep_per_batch=ep_per_batch) label_query = fs.make_nk_label( n_way, n_query, ep_per_batch=ep_per_batch).cuda() if config[ 'model'] == 'snail': # only use one randomly selected label_query query_dix = random_state.randint(n_train_way) label_query = label_query.view(ep_per_batch, -1)[:, query_dix] x_query = x_query[:, query_dix:query_dix + 1] if config['model'] == 'maml': # need grad in maml model.zero_grad() logits = model(x_shot, x_query, eval=True).view(-1, n_way) loss = F.cross_entropy(logits, label_query) acc = utils.compute_acc(logits, label_query) else: with torch.no_grad(): logits = model(x_shot, x_query, eval=True).view(-1, n_way) loss = F.cross_entropy(logits, label_query) acc = utils.compute_acc(logits, label_query) aves[name_l].add(loss.item()) aves[name_a].add(acc) # post if lr_scheduler is not None: lr_scheduler.step() for k, v in aves.items(): aves[k] = v.item() trlog[k].append(aves[k]) t_epoch = utils.time_str(timer_epoch.t()) t_used = utils.time_str(timer_used.t()) t_estimate = utils.time_str(timer_used.t() / epoch * max_epoch) log_str = 'epoch {}, train {:.4f}|{:.4f}, val {:.4f}|{:.4f}'.format( epoch, aves['tl'], aves['ta'], aves['vl'], aves['va']) for tval_name, _, loss_key, acc_key in tval_tuple_lst: log_str += ', {} {:.4f}|{:.4f}'.format(tval_name, aves[loss_key], aves[acc_key]) writer.add_scalars('loss', {tval_name: aves[loss_key]}, epoch) writer.add_scalars('acc', {tval_name: aves[acc_key]}, epoch) log_str += ', {} {}/{}'.format(t_epoch, t_used, t_estimate) utils.log(log_str) writer.add_scalars('loss', { 'train': aves['tl'], 'val': aves['vl'], }, epoch) writer.add_scalars('acc', { 'train': aves['ta'], 'val': aves['va'], }, epoch) if config.get('_parallel'): model_ = model.module else: model_ = model training = { 'epoch': epoch, 'optimizer': config['optimizer'], 'optimizer_args': config['optimizer_args'], 'optimizer_sd': optimizer.state_dict(), } save_obj = { 'file': __file__, 'config': config, 'model': config['model'], 'model_args': config['model_args'], 'model_sd': model_.state_dict(), 'training': training, } torch.save(save_obj, os.path.join(save_path, 'epoch-last.pth')) torch.save(trlog, os.path.join(save_path, 'trlog.pth')) if (save_epoch is not None) and epoch % save_epoch == 0: torch.save(save_obj, os.path.join(save_path, 'epoch-{}.pth'.format(epoch))) if aves['va'] > max_va: max_va = aves['va'] torch.save(save_obj, os.path.join(save_path, 'max-va.pth')) writer.flush() print('finished training!') logger.close()
def main(config): svname = args.name if svname is None: svname = 'meta_{}-{}shot'.format( config['train_dataset'], config['n_shot']) svname += '_' + config['model'] + '-' + config['model_args']['encoder'] if args.tag is not None: svname += '_' + args.tag save_path = os.path.join('./save', svname) utils.ensure_path(save_path) utils.set_log_path(save_path) writer = SummaryWriter(os.path.join(save_path, 'tensorboard')) yaml.dump(config, open(os.path.join(save_path, 'config.yaml'), 'w')) #### Dataset #### n_way, n_shot = config['n_way'], config['n_shot'] n_query = config['n_query'] if config.get('n_train_way') is not None: n_train_way = config['n_train_way'] else: n_train_way = n_way if config.get('n_train_shot') is not None: n_train_shot = config['n_train_shot'] else: n_train_shot = n_shot if config.get('ep_per_batch') is not None: ep_per_batch = config['ep_per_batch'] else: ep_per_batch = 1 # train train_dataset = datasets.make(config['train_dataset'], **config['train_dataset_args']) utils.log('train dataset: {} (x{}), {}'.format( train_dataset[0][0].shape, len(train_dataset), train_dataset.n_classes)) if config.get('visualize_datasets'): utils.visualize_dataset(train_dataset, 'train_dataset', writer) train_sampler = CategoriesSampler( train_dataset.label, config['train_batches'], n_train_way, n_train_shot + n_query, ep_per_batch=ep_per_batch) train_loader = DataLoader(train_dataset, batch_sampler=train_sampler, num_workers=8, pin_memory=True) # tval if config.get('tval_dataset'): tval_dataset = datasets.make(config['tval_dataset'], **config['tval_dataset_args']) utils.log('tval dataset: {} (x{}), {}'.format( tval_dataset[0][0].shape, len(tval_dataset), tval_dataset.n_classes)) if config.get('visualize_datasets'): utils.visualize_dataset(tval_dataset, 'tval_dataset', writer) tval_sampler = CategoriesSampler( tval_dataset.label, 200, n_way, n_shot + n_query, ep_per_batch=4) tval_loader = DataLoader(tval_dataset, batch_sampler=tval_sampler, num_workers=8, pin_memory=True) else: tval_loader = None # val val_dataset = datasets.make(config['val_dataset'], **config['val_dataset_args']) utils.log('val dataset: {} (x{}), {}'.format( val_dataset[0][0].shape, len(val_dataset), val_dataset.n_classes)) if config.get('visualize_datasets'): utils.visualize_dataset(val_dataset, 'val_dataset', writer) val_sampler = CategoriesSampler( val_dataset.label, 200, n_way, n_shot + n_query, ep_per_batch=4) val_loader = DataLoader(val_dataset, batch_sampler=val_sampler, num_workers=8, pin_memory=True) ######## #### Model and optimizer #### if config.get('load'): model_sv = torch.load(config['load']) model = models.load(model_sv) else: model = models.make(config['model'], **config['model_args']) if config.get('load_encoder'): encoder = models.load(torch.load(config['load_encoder'])).encoder model.encoder.load_state_dict(encoder.state_dict()) if config.get('_parallel'): model = nn.DataParallel(model) utils.log('num params: {}'.format(utils.compute_n_params(model))) optimizer, lr_scheduler = utils.make_optimizer( model.parameters(), config['optimizer'], **config['optimizer_args']) ######## max_epoch = config['max_epoch'] save_epoch = config.get('save_epoch') max_va = 0. timer_used = utils.Timer() timer_epoch = utils.Timer() aves_keys = ['tl', 'ta', 'tvl', 'tva', 'vl', 'va'] trlog = dict() for k in aves_keys: trlog[k] = [] for epoch in range(1, max_epoch + 1): timer_epoch.s() aves = {k: utils.Averager() for k in aves_keys} # train model.train() if config.get('freeze_bn'): utils.freeze_bn(model) writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch) np.random.seed(epoch) for data, _ in tqdm(train_loader, desc='train', leave=False): x_shot, x_query = fs.split_shot_query( data.cuda(), n_train_way, n_train_shot, n_query, ep_per_batch=ep_per_batch) label = fs.make_nk_label(n_train_way, n_query, ep_per_batch=ep_per_batch).cuda() logits = model(x_shot, x_query).view(-1, n_train_way) loss = F.cross_entropy(logits, label) acc = utils.compute_acc(logits, label) optimizer.zero_grad() loss.backward() optimizer.step() aves['tl'].add(loss.item()) aves['ta'].add(acc) logits = None; loss = None # eval model.eval() for name, loader, name_l, name_a in [ ('tval', tval_loader, 'tvl', 'tva'), ('val', val_loader, 'vl', 'va')]: if (config.get('tval_dataset') is None) and name == 'tval': continue np.random.seed(0) for data, _ in tqdm(loader, desc=name, leave=False): x_shot, x_query = fs.split_shot_query( data.cuda(), n_way, n_shot, n_query, ep_per_batch=4) label = fs.make_nk_label(n_way, n_query, ep_per_batch=4).cuda() with torch.no_grad(): logits = model(x_shot, x_query).view(-1, n_way) loss = F.cross_entropy(logits, label) acc = utils.compute_acc(logits, label) aves[name_l].add(loss.item()) aves[name_a].add(acc) _sig = int(_[-1]) # post if lr_scheduler is not None: lr_scheduler.step() for k, v in aves.items(): aves[k] = v.item() trlog[k].append(aves[k]) t_epoch = utils.time_str(timer_epoch.t()) t_used = utils.time_str(timer_used.t()) t_estimate = utils.time_str(timer_used.t() / epoch * max_epoch) utils.log('epoch {}, train {:.4f}|{:.4f}, tval {:.4f}|{:.4f}, ' 'val {:.4f}|{:.4f}, {} {}/{} (@{})'.format( epoch, aves['tl'], aves['ta'], aves['tvl'], aves['tva'], aves['vl'], aves['va'], t_epoch, t_used, t_estimate, _sig)) writer.add_scalars('loss', { 'train': aves['tl'], 'tval': aves['tvl'], 'val': aves['vl'], }, epoch) writer.add_scalars('acc', { 'train': aves['ta'], 'tval': aves['tva'], 'val': aves['va'], }, epoch) if config.get('_parallel'): model_ = model.module else: model_ = model training = { 'epoch': epoch, 'optimizer': config['optimizer'], 'optimizer_args': config['optimizer_args'], 'optimizer_sd': optimizer.state_dict(), } save_obj = { 'file': __file__, 'config': config, 'model': config['model'], 'model_args': config['model_args'], 'model_sd': model_.state_dict(), 'training': training, } torch.save(save_obj, os.path.join(save_path, 'epoch-last.pth')) torch.save(trlog, os.path.join(save_path, 'trlog.pth')) if (save_epoch is not None) and epoch % save_epoch == 0: torch.save(save_obj, os.path.join(save_path, 'epoch-{}.pth'.format(epoch))) if aves['va'] > max_va: max_va = aves['va'] torch.save(save_obj, os.path.join(save_path, 'max-va.pth')) writer.flush()
def main(config): # dataset dataset = datasets.make(config['dataset'], **config['dataset_args']) utils.log('dataset: {} (x{}), {}'.format(dataset[0][0].shape, len(dataset), dataset.n_classes)) if not args.sauc: n_way = 5 else: n_way = 2 n_shot, n_unlabel, n_query = args.shot, 30, 15 n_batch = 200 ep_per_batch = 4 batch_sampler = CategoriesSampler_Semi(dataset.label, n_batch, n_way, n_shot, n_unlabel, n_query, ep_per_batch=ep_per_batch) loader = DataLoader(dataset, batch_sampler=batch_sampler, num_workers=8, pin_memory=True) # model if config.get('load') is None: model = models.make('meta-baseline', encoder=None) else: model = models.load(torch.load(config['load'])) if config.get('load_encoder') is not None: encoder = models.load(torch.load(config['load_encoder'])).encoder model.encoder = encoder if config.get('_parallel'): model = nn.DataParallel(model) model.eval() utils.log('num params: {}'.format(utils.compute_n_params(model))) # testing aves_keys = ['vl', 'va'] aves = {k: utils.Averager() for k in aves_keys} test_epochs = args.test_epochs np.random.seed(0) va_lst = [] for epoch in range(1, test_epochs + 1): for data, _ in tqdm(loader, leave=False): x_shot, x_unlabel, x_query = fs.split_shot_query_semi( data.cuda(), n_way, n_shot, n_unlabel, n_query, ep_per_batch=ep_per_batch) with torch.no_grad(): if not args.sauc: logits = model(x_shot, x_unlabel, x_query).view(-1, n_way) label = fs.make_nk_label(n_way, n_query, ep_per_batch=ep_per_batch).cuda() loss = F.cross_entropy(logits, label) acc = utils.compute_acc(logits, label) aves['vl'].add(loss.item(), len(data)) aves['va'].add(acc, len(data)) va_lst.append(acc) else: x_shot = x_shot[:, 0, :, :, :, :].contiguous() shot_shape = x_shot.shape[:-3] img_shape = x_shot.shape[-3:] bs = shot_shape[0] p = model.encoder(x_shot.view(-1, *img_shape)).reshape( *shot_shape, -1).mean(dim=1, keepdim=True) q = model.encoder(x_query.view(-1, *img_shape)).view( bs, -1, p.shape[-1]) p = F.normalize(p, dim=-1) q = F.normalize(q, dim=-1) s = torch.bmm(q, p.transpose(2, 1)).view(bs, -1).cpu() for i in range(bs): k = s.shape[1] // 2 y_true = [1] * k + [0] * k acc = roc_auc_score(y_true, s[i]) aves['va'].add(acc, len(data)) va_lst.append(acc) print('test epoch {}: acc={:.2f} +- {:.2f} (%), loss={:.4f} (@{})'. format(epoch, aves['va'].item() * 100, mean_confidence_interval(va_lst) * 100, aves['vl'].item(), _[-1]))
def main(config, args): random.seed(0) np.random.seed(0) torch.manual_seed(0) torch.cuda.manual_seed(0) # torch.backends.cudnn.deterministic = True # torch.backends.cudnn.benchmark = False wandb_auth() try: __IPYTHON__ wandb.init(project="NAS", group=f"maml") except: wandb.init(project="NAS", group=f"maml", config=config) ckpt_name = args.name if ckpt_name is None: ckpt_name = config['encoder'] ckpt_name += '_' + config['dataset'].replace('meta-', '') ckpt_name += '_{}_way_{}_shot'.format(config['train']['n_way'], config['train']['n_shot']) if args.tag is not None: ckpt_name += '_' + args.tag ckpt_path = os.path.join('./save', ckpt_name) utils.ensure_path(ckpt_path) utils.set_log_path(ckpt_path) writer = SummaryWriter(os.path.join(ckpt_path, 'tensorboard')) yaml.dump(config, open(os.path.join(ckpt_path, 'config.yaml'), 'w')) ##### Dataset ##### # meta-train train_set = datasets.make(config['dataset'], **config['train']) utils.log('meta-train set: {} (x{}), {}'.format(train_set[0][0].shape, len(train_set), train_set.n_classes)) # meta-val eval_val = False if config.get('val'): eval_val = True val_set = datasets.make(config['dataset'], **config['val']) utils.log('meta-val set: {} (x{}), {}'.format(val_set[0][0].shape, len(val_set), val_set.n_classes)) val_loader = DataLoader(val_set, config['val']['n_episode'], collate_fn=datasets.collate_fn, num_workers=1, pin_memory=True) # if args.split == "traintrain" and config.get('val'): # TODO I dont think this is what they meant by train-train :D # train_set = torch.utils.data.ConcatDataset([train_set, val_set]) train_loader = DataLoader(train_set, config['train']['n_episode'], collate_fn=datasets.collate_fn, num_workers=1, pin_memory=True) ##### Model and Optimizer ##### inner_args = utils.config_inner_args(config.get('inner_args')) if config.get('load') or (args.load is True and os.path.exists(ckpt_path + '/epoch-last.pth')): if config.get('load') is None: config['load'] = ckpt_path + '/epoch-last.pth' ckpt = torch.load(config['load']) config['encoder'] = ckpt['encoder'] config['encoder_args'] = ckpt['encoder_args'] config['classifier'] = ckpt['classifier'] config['classifier_args'] = ckpt['classifier_args'] model = models.load(ckpt, load_clf=(not inner_args['reset_classifier'])) optimizer, lr_scheduler = optimizers.load(ckpt, model.parameters()) start_epoch = ckpt['training']['epoch'] + 1 max_va = ckpt['training']['max_va'] else: config['encoder_args'] = config.get('encoder_args') or dict() config['classifier_args'] = config.get('classifier_args') or dict() config['encoder_args']['bn_args']['n_episode'] = config['train'][ 'n_episode'] config['classifier_args']['n_way'] = config['train']['n_way'] model = models.make(config['encoder'], config['encoder_args'], config['classifier'], config['classifier_args']) optimizer, lr_scheduler = optimizers.make(config['optimizer'], model.parameters(), **config['optimizer_args']) start_epoch = 1 max_va = 0. if args.efficient: model.go_efficient() if config.get('_parallel'): model = nn.DataParallel(model) utils.log('num params: {}'.format(utils.compute_n_params(model))) timer_elapsed, timer_epoch = utils.Timer(), utils.Timer() ##### Training and evaluation ##### # 'tl': meta-train loss # 'ta': meta-train accuracy # 'vl': meta-val loss # 'va': meta-val accuracy aves_keys = ['tl', 'ta', 'vl', 'va'] trlog = dict() for k in aves_keys: trlog[k] = [] for epoch in tqdm(range(start_epoch, config['epoch'] + 1), desc="Iterating over epochs"): timer_epoch.start() aves = {k: utils.AverageMeter() for k in aves_keys} # meta-train model.train() writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch) np.random.seed(epoch) all_sotls = 0 all_sovls = 0 for data_idx, data in enumerate( tqdm(train_loader, desc='meta-train', leave=False)): x_shot, x_query, y_shot, y_query = data x_shot, y_shot = x_shot.cuda(), y_shot.cuda() x_query, y_query = x_query.cuda(), y_query.cuda() if inner_args['reset_classifier']: if config.get('_parallel'): model.module.reset_classifier() else: model.reset_classifier() if args.split == "traintrain": x_query = x_shot y_query = y_shot logits, sotl, all_losses = model(x_shot, x_query, y_shot, inner_args, meta_train=True) # print("HAHHA", data_idx, all_losses) # sotl = sum([l[-1] for l in all_losses]) # for l in all_losses[:-1]: # for i in range(len(l)-1): # l[i] = l[i].detach() logits = logits.flatten(0, 1) labels = y_query.flatten() all_sotls += sotl pred = torch.argmax(logits, dim=-1) acc = utils.compute_acc(pred, labels) loss = F.cross_entropy(logits, labels) # all_sovls += loss # TODO I think this causes blowup because it creates new tensors that never get discarded and it maintains the computational graph after? if args.split == "trainval" or ( args.split == "sovl" and not data_idx % args.sotl_freq == 0): aves['tl'].update(loss.item(), 1) aves['ta'].update(acc, 1) optimizer.zero_grad() loss.backward() for param in optimizer.param_groups[0]['params']: nn.utils.clip_grad_value_(param, 10) optimizer.step() elif args.split == "traintrain": aves['tl'].update(loss.item(), 1) aves['ta'].update(acc, 1) # sotl = sum(sotl) + loss optimizer.zero_grad() # sotl.backward() loss.backward() for param in optimizer.param_groups[0]['params']: nn.utils.clip_grad_value_(param, 10) optimizer.step() elif args.split == "sotl" and data_idx % args.sotl_freq == 0: # TODO doesnt work whatsoever aves['tl'].update(loss.item(), 1) aves['ta'].update(acc, 1) optimizer.zero_grad() all_sotls.backward() for param in optimizer.param_groups[0]['params']: nn.utils.clip_grad_value_(param, 10) optimizer.step() all_sotls = 0 # detach elif args.split == "sovl" and data_idx % args.sotl_freq == 0: # TODO doesnt work whatsoever aves['tl'].update(loss.item(), 1) aves['ta'].update(acc, 1) optimizer.zero_grad() all_sovls.backward() for param in optimizer.param_groups[0]['params']: nn.utils.clip_grad_value_(param, 10) optimizer.step() all_sovls = 0 # detach # meta-val if eval_val: model.eval() np.random.seed(0) for data in tqdm(val_loader, desc='meta-val', leave=False): x_shot, x_query, y_shot, y_query = data x_shot, y_shot = x_shot.cuda(), y_shot.cuda() x_query, y_query = x_query.cuda(), y_query.cuda() if inner_args['reset_classifier']: if config.get('_parallel'): model.module.reset_classifier() else: model.reset_classifier() logits, sotl, all_losses = model(x_shot, x_query, y_shot, inner_args, meta_train=False) logits = logits.flatten(0, 1) labels = y_query.flatten() pred = torch.argmax(logits, dim=-1) acc = utils.compute_acc(pred, labels) loss = F.cross_entropy(logits, labels) aves['vl'].update(loss.item(), 1) aves['va'].update(acc, 1) if lr_scheduler is not None: lr_scheduler.step() for k, avg in aves.items(): aves[k] = avg.item() trlog[k].append(aves[k]) t_epoch = utils.time_str(timer_epoch.end()) t_elapsed = utils.time_str(timer_elapsed.end()) t_estimate = utils.time_str(timer_elapsed.end() / (epoch - start_epoch + 1) * (config['epoch'] - start_epoch + 1)) # formats output log_str = 'epoch {}, meta-train {:.4f}|{:.4f}'.format( str(epoch), aves['tl'], aves['ta']) writer.add_scalars('loss', {'meta-train': aves['tl']}, epoch) writer.add_scalars('acc', {'meta-train': aves['ta']}, epoch) if eval_val: log_str += ', meta-val {:.4f}|{:.4f}'.format( aves['vl'], aves['va']) writer.add_scalars('loss', {'meta-val': aves['vl']}, epoch) writer.add_scalars('acc', {'meta-val': aves['va']}, epoch) wandb.log({ "train_loss": aves['tl'], "train_acc": aves['ta'], "val_loss": aves['vl'], "val_acc": aves['va'] }) log_str += ', {} {}/{}'.format(t_epoch, t_elapsed, t_estimate) utils.log(log_str) # saves model and meta-data if config.get('_parallel'): model_ = model.module else: model_ = model training = { 'epoch': epoch, 'max_va': max(max_va, aves['va']), 'optimizer': config['optimizer'], 'optimizer_args': config['optimizer_args'], 'optimizer_state_dict': optimizer.state_dict(), 'lr_scheduler_state_dict': lr_scheduler.state_dict() if lr_scheduler is not None else None, } ckpt = { 'file': __file__, 'config': config, 'encoder': config['encoder'], 'encoder_args': config['encoder_args'], 'encoder_state_dict': model_.encoder.state_dict(), 'classifier': config['classifier'], 'classifier_args': config['classifier_args'], 'classifier_state_dict': model_.classifier.state_dict(), 'training': training, } # 'epoch-last.pth': saved at the latest epoch # 'max-va.pth': saved when validation accuracy is at its maximum torch.save(ckpt, os.path.join(ckpt_path, 'epoch-last.pth')) torch.save(trlog, os.path.join(ckpt_path, 'trlog.pth')) if aves['va'] > max_va: max_va = aves['va'] torch.save(ckpt, os.path.join(ckpt_path, 'max-va.pth')) writer.flush()
if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--config') parser.add_argument('--model') parser.add_argument('--gpu', default='0') args = parser.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu with open(args.config, 'r') as f: config = yaml.load(f, Loader=yaml.FullLoader) spec = config['test_dataset'] dataset = datasets.make(spec['dataset']) dataset = datasets.make(spec['wrapper'], args={'dataset': dataset}) loader = DataLoader(dataset, batch_size=spec['batch_size'], num_workers=8, pin_memory=True) model_spec = torch.load(args.model)['model'] model = models.make(model_spec, load_sd=True).cuda() res = eval_psnr(loader, model, data_norm=config.get('data_norm'), eval_type=config.get('eval_type'), eval_bsize=config.get('eval_bsize'), verbose=True)
def main(config): random.seed(0) np.random.seed(0) torch.manual_seed(0) torch.cuda.manual_seed(0) # torch.backends.cudnn.deterministic = True # torch.backends.cudnn.benchmark = False ckpt_name = args.name if ckpt_name is None: ckpt_name = config['encoder'] ckpt_name += '_' + config['dataset'].replace('meta-', '') ckpt_name += '_{}_way_{}_shot'.format(config['train']['n_way'], config['train']['n_shot']) if args.tag is not None: ckpt_name += '_' + args.tag ckpt_path = os.path.join('./save', ckpt_name) utils.ensure_path(ckpt_path) utils.set_log_path(ckpt_path) writer = SummaryWriter(os.path.join(ckpt_path, 'tensorboard')) yaml.dump(config, open(os.path.join(ckpt_path, 'config.yaml'), 'w')) ##### Dataset ##### # meta-train train_set = datasets.make(config['dataset'], **config['train']) utils.log('meta-train set: {} (x{}), {}'.format(train_set[0][0].shape, len(train_set), train_set.n_classes)) train_loader = DataLoader(train_set, config['train']['n_episode'], collate_fn=datasets.collate_fn, num_workers=1, pin_memory=True) # meta-val eval_val = False if config.get('val'): eval_val = True val_set = datasets.make(config['dataset'], **config['val']) utils.log('meta-val set: {} (x{}), {}'.format(val_set[0][0].shape, len(val_set), val_set.n_classes)) val_loader = DataLoader(val_set, config['val']['n_episode'], collate_fn=datasets.collate_fn, num_workers=1, pin_memory=True) ##### Model and Optimizer ##### inner_args = utils.config_inner_args(config.get('inner_args')) if config.get('load'): ckpt = torch.load(config['load']) config['encoder'] = ckpt['encoder'] config['encoder_args'] = ckpt['encoder_args'] config['classifier'] = ckpt['classifier'] config['classifier_args'] = ckpt['classifier_args'] model = models.load(ckpt, load_clf=(not inner_args['reset_classifier'])) optimizer, lr_scheduler = optimizers.load(ckpt, model.parameters()) start_epoch = ckpt['training']['epoch'] + 1 max_va = ckpt['training']['max_va'] else: config['encoder_args'] = config.get('encoder_args') or dict() config['classifier_args'] = config.get('classifier_args') or dict() config['encoder_args']['bn_args']['n_episode'] = config['train'][ 'n_episode'] config['classifier_args']['n_way'] = config['train']['n_way'] model = models.make(config['encoder'], config['encoder_args'], config['classifier'], config['classifier_args']) optimizer, lr_scheduler = optimizers.make(config['optimizer'], model.parameters(), **config['optimizer_args']) start_epoch = 1 max_va = 0. if args.efficient: model.go_efficient() if config.get('_parallel'): model = nn.DataParallel(model) utils.log('num params: {}'.format(utils.compute_n_params(model))) timer_elapsed, timer_epoch = utils.Timer(), utils.Timer() ##### Training and evaluation ##### # 'tl': meta-train loss # 'ta': meta-train accuracy # 'vl': meta-val loss # 'va': meta-val accuracy aves_keys = ['tl', 'ta', 'vl', 'va'] trlog = dict() for k in aves_keys: trlog[k] = [] for epoch in range(start_epoch, config['epoch'] + 1): timer_epoch.start() aves = {k: utils.AverageMeter() for k in aves_keys} # meta-train model.train() writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch) np.random.seed(epoch) for data in tqdm(train_loader, desc='meta-train', leave=False): x_shot, x_query, y_shot, y_query = data x_shot, y_shot = x_shot.cuda(), y_shot.cuda() x_query, y_query = x_query.cuda(), y_query.cuda() if inner_args['reset_classifier']: if config.get('_parallel'): model.module.reset_classifier() else: model.reset_classifier() logits = model(x_shot, x_query, y_shot, inner_args, meta_train=True) logits = logits.flatten(0, 1) labels = y_query.flatten() pred = torch.argmax(logits, dim=-1) acc = utils.compute_acc(pred, labels) loss = F.cross_entropy(logits, labels) aves['tl'].update(loss.item(), 1) aves['ta'].update(acc, 1) optimizer.zero_grad() loss.backward() for param in optimizer.param_groups[0]['params']: nn.utils.clip_grad_value_(param, 10) optimizer.step() # meta-val if eval_val: model.eval() np.random.seed(0) for data in tqdm(val_loader, desc='meta-val', leave=False): x_shot, x_query, y_shot, y_query = data x_shot, y_shot = x_shot.cuda(), y_shot.cuda() x_query, y_query = x_query.cuda(), y_query.cuda() if inner_args['reset_classifier']: if config.get('_parallel'): model.module.reset_classifier() else: model.reset_classifier() logits = model(x_shot, x_query, y_shot, inner_args, meta_train=False) logits = logits.flatten(0, 1) labels = y_query.flatten() pred = torch.argmax(logits, dim=-1) acc = utils.compute_acc(pred, labels) loss = F.cross_entropy(logits, labels) aves['vl'].update(loss.item(), 1) aves['va'].update(acc, 1) if lr_scheduler is not None: lr_scheduler.step() for k, avg in aves.items(): aves[k] = avg.item() trlog[k].append(aves[k]) t_epoch = utils.time_str(timer_epoch.end()) t_elapsed = utils.time_str(timer_elapsed.end()) t_estimate = utils.time_str(timer_elapsed.end() / (epoch - start_epoch + 1) * (config['epoch'] - start_epoch + 1)) # formats output log_str = 'epoch {}, meta-train {:.4f}|{:.4f}'.format( str(epoch), aves['tl'], aves['ta']) writer.add_scalars('loss', {'meta-train': aves['tl']}, epoch) writer.add_scalars('acc', {'meta-train': aves['ta']}, epoch) if eval_val: log_str += ', meta-val {:.4f}|{:.4f}'.format( aves['vl'], aves['va']) writer.add_scalars('loss', {'meta-val': aves['vl']}, epoch) writer.add_scalars('acc', {'meta-val': aves['va']}, epoch) log_str += ', {} {}/{}'.format(t_epoch, t_elapsed, t_estimate) utils.log(log_str) # saves model and meta-data if config.get('_parallel'): model_ = model.module else: model_ = model training = { 'epoch': epoch, 'max_va': max(max_va, aves['va']), 'optimizer': config['optimizer'], 'optimizer_args': config['optimizer_args'], 'optimizer_state_dict': optimizer.state_dict(), 'lr_scheduler_state_dict': lr_scheduler.state_dict() if lr_scheduler is not None else None, } ckpt = { 'file': __file__, 'config': config, 'encoder': config['encoder'], 'encoder_args': config['encoder_args'], 'encoder_state_dict': model_.encoder.state_dict(), 'classifier': config['classifier'], 'classifier_args': config['classifier_args'], 'classifier_state_dict': model_.classifier.state_dict(), 'training': training, } # 'epoch-last.pth': saved at the latest epoch # 'max-va.pth': saved when validation accuracy is at its maximum torch.save(ckpt, os.path.join(ckpt_path, 'epoch-last.pth')) torch.save(trlog, os.path.join(ckpt_path, 'trlog.pth')) if aves['va'] > max_va: max_va = aves['va'] torch.save(ckpt, os.path.join(ckpt_path, 'max-va.pth')) writer.flush()
def main(config): svname = args.name if svname is None: svname = 'moco_{}'.format(config['train_dataset']) svname += '_' + config['model_args']['encoder'] out_dim = config['model_args']['encoder_args']['out_dim'] svname += '-out_dim' + str(out_dim) svname += '-seed' + str(args.seed) if args.tag is not None: svname += '_' + args.tag save_path = os.path.join(args.save_dir, svname) utils.ensure_path(save_path, remove=False) utils.set_log_path(save_path) writer = SummaryWriter(os.path.join(save_path, 'tensorboard')) yaml.dump(config, open(os.path.join(save_path, 'config.yaml'), 'w')) random_state = np.random.RandomState(args.seed) print('seed:', args.seed) logger = utils.Logger(file_name=os.path.join(save_path, "log_sdout.txt"), file_mode="a+", should_flush=True) #### Dataset #### # train train_dataset = datasets.make(config['train_dataset'], **config['train_dataset_args']) train_loader = DataLoader(train_dataset, config['batch_size'], shuffle=True, num_workers=8, pin_memory=True, drop_last=True) utils.log('train dataset: {} (x{})'.format(train_dataset[0][0][0].shape, len(train_dataset))) if config.get('visualize_datasets'): utils.visualize_dataset(train_dataset, 'train_dataset', writer) # val if config.get('val_dataset'): eval_val = True val_dataset = datasets.make(config['val_dataset'], **config['val_dataset_args']) val_loader = DataLoader(val_dataset, config['batch_size'], num_workers=8, pin_memory=True, drop_last=True) utils.log('val dataset: {} (x{})'.format(val_dataset[0][0][0].shape, len(val_dataset))) if config.get('visualize_datasets'): utils.visualize_dataset(val_dataset, 'val_dataset', writer) else: eval_val = False # few-shot eval if config.get('eval_fs'): ef_epoch = config.get('eval_fs_epoch') if ef_epoch is None: ef_epoch = 5 eval_fs = True n_way = 2 n_query = 1 n_shot = 6 if config.get('ep_per_batch') is not None: ep_per_batch = config['ep_per_batch'] else: ep_per_batch = 1 # tvals fs_loaders = {} tval_name_ntasks_dict = { 'tval': 2000, 'tval_ff': 600, 'tval_bd': 480, 'tval_hd_comb': 400, 'tval_hd_novel': 320 } # numbers depend on dataset for tval_type in tval_name_ntasks_dict.keys(): if config.get('{}_dataset'.format(tval_type)): tval_dataset = datasets.make( config['{}_dataset'.format(tval_type)], **config['{}_dataset_args'.format(tval_type)]) utils.log('{} dataset: {} (x{})'.format( tval_type, tval_dataset[0][0][0].shape, len(tval_dataset))) if config.get('visualize_datasets'): utils.visualize_dataset(tval_dataset, 'tval_ff_dataset', writer) tval_sampler = BongardSampler( tval_dataset.n_tasks, n_batch=tval_name_ntasks_dict[tval_type] // ep_per_batch, ep_per_batch=ep_per_batch, seed=random_state.randint(2**31)) tval_loader = DataLoader(tval_dataset, batch_sampler=tval_sampler, num_workers=8, pin_memory=True) fs_loaders.update({tval_type: tval_loader}) else: fs_loaders.update({tval_type: None}) else: eval_fs = False ######## #### Model and Optimizer #### if config.get('load'): model_sv = torch.load(config['load']) model = models.load(model_sv) else: model = models.make(config['model'], **config['model_args']) if eval_fs: fs_model = models.make('meta-baseline', encoder=None) fs_model.encoder = model.encoder if config.get('_parallel'): model = nn.DataParallel(model) if eval_fs: fs_model = nn.DataParallel(fs_model) utils.log('num params: {}'.format(utils.compute_n_params(model))) optimizer, lr_scheduler = utils.make_optimizer(model.parameters(), config['optimizer'], **config['optimizer_args']) ######## max_epoch = config['max_epoch'] save_epoch = config.get('save_epoch') max_va = 0. timer_used = utils.Timer() timer_epoch = utils.Timer() for epoch in range(1, max_epoch + 1 + 1): timer_epoch.s() aves_keys = ['tl', 'ta', 'vl', 'va', 'tvl', 'tva'] if eval_fs: for k, v in fs_loaders.items(): if v is not None: aves_keys += ['fsa' + k.split('tval')[-1]] aves = {ave_k: utils.Averager() for ave_k in aves_keys} # train model.train() writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch) for data, _ in tqdm(train_loader, desc='train', leave=False): logits, label = model(im_q=data[0].cuda(), im_k=data[1].cuda()) loss = F.cross_entropy(logits, label) acc = utils.compute_acc(logits, label) optimizer.zero_grad() loss.backward() optimizer.step() aves['tl'].add(loss.item()) aves['ta'].add(acc) logits = None loss = None # val if eval_val: model.eval() for data, _ in tqdm(val_loader, desc='val', leave=False): with torch.no_grad(): logits, label = model(im_q=data[0].cuda(), im_k=data[1].cuda()) loss = F.cross_entropy(logits, label) acc = utils.compute_acc(logits, label) aves['vl'].add(loss.item()) aves['va'].add(acc) if eval_fs and (epoch % ef_epoch == 0 or epoch == max_epoch + 1): fs_model.eval() for k, v in fs_loaders.items(): if v is not None: ave_key = 'fsa' + k.split('tval')[-1] np.random.seed(0) for data, _ in tqdm(v, desc=ave_key, leave=False): x_shot, x_query = fs.split_shot_query( data[0].cuda(), n_way, n_shot, n_query, ep_per_batch=ep_per_batch) label_query = fs.make_nk_label( n_way, n_query, ep_per_batch=ep_per_batch).cuda() with torch.no_grad(): logits = fs_model(x_shot, x_query).view(-1, n_way) acc = utils.compute_acc(logits, label_query) aves[ave_key].add(acc) # post if lr_scheduler is not None: lr_scheduler.step() for k, v in aves.items(): aves[k] = v.item() t_epoch = utils.time_str(timer_epoch.t()) t_used = utils.time_str(timer_used.t()) t_estimate = utils.time_str(timer_used.t() / epoch * max_epoch) if epoch <= max_epoch: epoch_str = str(epoch) else: epoch_str = 'ex' log_str = 'epoch {}, train {:.4f}|{:.4f}'.format( epoch_str, aves['tl'], aves['ta']) writer.add_scalars('loss', {'train': aves['tl']}, epoch) writer.add_scalars('acc', {'train': aves['ta']}, epoch) if eval_val: log_str += ', val {:.4f}|{:.4f}, tval {:.4f}|{:.4f}'.format( aves['vl'], aves['va'], aves['tvl'], aves['tva']) writer.add_scalars('loss', {'val': aves['vl']}, epoch) writer.add_scalars('loss', {'tval': aves['tvl']}, epoch) writer.add_scalars('acc', {'val': aves['va']}, epoch) writer.add_scalars('acc', {'tval': aves['tva']}, epoch) if eval_fs and (epoch % ef_epoch == 0 or epoch == max_epoch + 1): log_str += ', fs' for ave_key in aves_keys: if 'fsa' in ave_key: log_str += ' {}: {:.4f}'.format(ave_key, aves[ave_key]) writer.add_scalars('acc', {ave_key: aves[ave_key]}, epoch) if epoch <= max_epoch: log_str += ', {} {}/{}'.format(t_epoch, t_used, t_estimate) else: log_str += ', {}'.format(t_epoch) utils.log(log_str) if config.get('_parallel'): model_ = model.module else: model_ = model training = { 'epoch': epoch, 'optimizer': config['optimizer'], 'optimizer_args': config['optimizer_args'], 'optimizer_sd': optimizer.state_dict(), } save_obj = { 'file': __file__, 'config': config, 'model': config['model'], 'model_args': config['model_args'], 'model_sd': model_.state_dict(), 'training': training, } if epoch <= max_epoch: torch.save(save_obj, os.path.join(save_path, 'epoch-last.pth')) if (save_epoch is not None) and epoch % save_epoch == 0: torch.save( save_obj, os.path.join(save_path, 'epoch-{}.pth'.format(epoch))) if aves['va'] > max_va: max_va = aves['va'] torch.save(save_obj, os.path.join(save_path, 'max-va.pth')) else: torch.save(save_obj, os.path.join(save_path, 'epoch-ex.pth')) writer.flush() print('finished training!') logger.close()