def training_step(batch): nonlocal batches_accumulated if batches_accumulated == 0: optimizers.zero_grad() output_dict = model(batch['input'], batch['instance_target']) output_dict['classifier_target'] = batch['classifier_target'] loss_dict = criterion(output_dict) top1, top5 = utils.accuracy(output_dict['classifier_output'].data, output_dict['classifier_target'].data, topk=(1, 5)) loss_dict['loss'].backward() batches_accumulated += 1 if batches_accumulated == args.gradient_accum: mag = {} for (name, p) in model.named_parameters(): mag[name] = p.grad.norm().item() optimizers.step() batches_accumulated = 0 return_dict = OD() for key in ['cross_entropy', 'margin', 'loss']: if key in loss_dict: return_dict[key] = loss_dict[key].item() return_dict['beta'] = beta.item() return_dict['top1'] = top1 return_dict['top5'] = top5 return return_dict
def validation_step(batch): with torch.no_grad(): output_dict = model(batch['input']) target = batch['classifier_target'] xloss = criterion(output_dict['classifier_output'], target) top1, top5 = utils.accuracy(output_dict['classifier_output'], target, topk=(1, 5)) return OD([ ('cross_entropy', xloss.item()), ('top1', top1), ('top5', top5), ])
def training_step(batch): optimizers.zero_grad() output_dict = model(batch['input']) loss = criterion(output_dict['classifier_output'], batch['classifier_target']) top1, top5 = utils.accuracy(output_dict['classifier_output'].data, batch['classifier_target'].data, topk=(1, 5)) p.grad = torch.autograd.grad(loss, p)[0] # partial backward optimizers.step() return OD([ ('cross_entropy', loss.item()), ('p', p.item()), ('top1', top1), ('top5', top5), ])
def run(args): argstr = yaml.dump(args.__dict__, default_flow_style=False) print('arguments:') print(argstr) argfile = osp.join(osp.join(args.expdir), 'evaluate_args.yaml') args.cuda = not args.no_cuda if not args.dry: utils.ifmakedirs(args.expdir) logging.print_file(argstr, argfile) collate_fn = dict( collate_fn=list_collate) if args.input_crop == 'rect' else {} transforms = get_transforms(input_size=args.input_size, crop=(args.input_crop == 'square'), need=('val', )) if args.dataset.startswith('imagenet'): dataset = IdDataset( IN1K(args.imagenet_path, args.dataset[len('imagenet-'):], transform=transforms['val'])) mode = "classification" else: raise NotImplementedError loader = DataLoader(dataset, batch_size=args.batch_size, num_workers=args.workers, shuffle=args.shuffle, pin_memory=True, **collate_fn) model = get_multigrain(args.backbone, include_sampling=False, pretrained_backbone=args.pretrained_backbone) p = model.pool.p checkpoints = utils.CheckpointHandler(args.expdir) if checkpoints.exists(args.resume_epoch, args.resume_from): epoch = checkpoints.resume(model, resume_epoch=args.resume_epoch, resume_from=args.resume_from, return_extra=False) else: raise ValueError('Checkpoint ' + args.resume_from + ' not found') if args.pooling_exponent is not None: # overwrite stored pooling exponent p.data.fill_(args.init_pooling_exponent) print("Multigrain model with {} backbone and p={} pooling:".format( args.backbone, p.item())) print(model) if args.cuda: model = utils.cuda(model) model.eval() # freeze batch normalization print("Evaluating", args.dataset) metrics_history = OD() metrics = defaultdict(utils.HistoryMeter) embeddings = [] index = None tic() for i, batch in enumerate(loader): with torch.no_grad(): if args.cuda: batch = utils.cuda(batch) metrics["data_time"].update(1000 * toc()) tic() output_dict = model(batch['input']) if mode == "classification": target = batch['classifier_target'] top1, top5 = utils.accuracy(output_dict['classifier_output'], target, topk=(1, 5)) metrics["val_top1"].update(top1) metrics["val_top5"].update(top5) elif mode == "retrieval": if index is None: index = faiss.IndexFlatL2(descriptors.size(1)) descriptors = output_dict['normalized_embedding'] for e in descriptors.cpu(): index.append(e) metrics["batch_time"].update(1000 * toc()) tic() print( logging.str_metrics(metrics, iter=i, num_iters=len(loader), epoch=epoch, num_epochs=epoch)) print(logging.str_metrics(metrics, epoch=epoch, num_epochs=1)) for k in metrics: metrics[k] = metrics[k].avg toc() metrics_history[epoch] = metrics checkpoints.save_metrics(metrics_history)