model.train() requires_grad_(model, True) accs = AverageMeter() losses = AverageMeter() attack_norms = AverageMeter() scheduler.step() length = len(train_loader) for i, (images, labels) in enumerate(tqdm.tqdm(train_loader, ncols=80)): images, labels = images.to(DEVICE), labels.to(DEVICE) if args.adv is not None and epoch >= args.adv: model.eval() requires_grad_(model, False) with torch.no_grad(): accs.append( (model(images).argmax(1) == labels).float().mean().item()) adv = attacker.attack(model, images, labels) l2_norms = (adv - images).view(args.batch_size, -1).norm(2, 1) mean_norm = l2_norms.mean() if args.max_norm: adv = torch.renorm( adv - images, p=2, dim=0, maxnorm=args.max_norm) + images attack_norms.append(mean_norm.item()) requires_grad_(model, True) model.train() logits = model(adv.detach()) else: logits = model(images) accs.append((logits.argmax(1) == labels).float().mean().item()) loss = F.cross_entropy(logits, labels)
def main(epochs, seed): device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') cudnn.deterministic = True torch.manual_seed(seed) train_loader, valid_loader, test_loader = load_dataset() model = load_model() model = torch.nn.DataParallel(model) model.to(device) optimizer, scheduler = get_optimizer_scheduler( parameters=model.parameters()) train_losses = AverageMeter() train_accs = AverageMeter() best_valid_acc = 0 best_valid_loss = float('inf') best_model_dict = deepcopy(model.module.state_dict()) for epoch in range(epochs): model.train() train_losses.reset(), train_accs.reset() loader_length = len(train_loader) pbar = tqdm(train_loader, ncols=80, desc='Training') start = time.time() for i, (images, labels) in enumerate(pbar): images, labels = images.to(device), labels.to(device, non_blocking=True) logits = model(images) predictions = model.module.pooling.predictions(logits=logits) loss = model.module.pooling.loss(logits=logits, labels=labels) optimizer.zero_grad() loss.backward() optimizer.step() acc = (predictions == labels).float().mean().item() loss = loss.item() step = epoch + i / loader_length ex.log_scalar('training.loss', loss, step) ex.log_scalar('training.acc', acc, step) train_losses.append(loss) train_accs.append(acc) scheduler.step() end = time.time() duration = end - start # evaluate on validation set valid_metrics = validation(model=model, loader=valid_loader, device=device) if valid_metrics['losses'].mean() <= best_valid_loss: best_valid_acc = valid_metrics['accuracy'] best_valid_loss = valid_metrics['losses'].mean() best_model_dict = deepcopy(model.module.state_dict()) ex.log_scalar('validation.loss', valid_metrics['losses'].mean(), epoch + 1) ex.log_scalar('validation.acc', valid_metrics['accuracy'], epoch + 1) print('Epoch {:02d} | Duration: {:.1f}s - per batch ({}): {:.3f}s'. format(epoch, duration, loader_length, duration / loader_length)) print( ' ' * 8, '| Train loss: {:.4f} acc: {:.3f}'.format(train_losses.avg, train_accs.avg)) print( ' ' * 8, '| Valid loss: {:.4f} acc: {:.3f}'.format( valid_metrics['losses'].mean(), valid_metrics['accuracy'])) # load best model based on validation loss model = load_model() model.load_state_dict(best_model_dict) model.to(device) # evaluate on test set test_metrics = test(model=model, loader=test_loader, device=device) ex.log_scalar('test.loss', test_metrics['losses'].mean(), epochs) ex.log_scalar('test.acc', test_metrics['accuracy'], epochs) # save model save_name = get_save_name() + '.pickle' torch.save(state_dict_to_cpu(best_model_dict), save_name) ex.add_artifact(os.path.abspath(save_name)) # save test metrics if len(ex.current_run.observers) > 0: dataset = ex.current_run.config['dataset']['name'] pooling = ex.current_run.config['model']['pooling'] split = ex.current_run.config['dataset']['split'] fold = ex.current_run.config['dataset']['fold'] torch.save( test_metrics, os.path.join( ex.current_run.observers[0].dir, '{}_{}_split-{}_fold-{}.pkl'.format(dataset, pooling, split, fold))) # metrics to info.json info_to_save = [ 'labels', 'logits', 'probabilities', 'predictions', 'losses', 'accuracy', 'AP', 'confusion_matrix', 'dice', 'dice_per_image', 'mean_dice', 'iou', 'iou_per_image', 'mean_iou' ] for k in info_to_save: ex.info[k] = test_metrics[k] return test_metrics['mean_dice']
accs = AverageMeter() losses = AverageMeter() attack_norms = AverageMeter() length = len(train_loader) for i, (images, labels) in enumerate(tqdm.tqdm(train_loader, ncols=80)): images, labels = images.to(DEVICE), labels.to(DEVICE) logits = model(images) loss = F.cross_entropy(logits, labels) optimizer.zero_grad() loss.backward() optimizer.step() accs.append((logits.argmax(1) == labels).float().mean().item()) losses.append(loss.item()) print('Epoch {} | Training | Loss: {:.4f}, Accs: {:.4f}'.format( epoch, losses.avg, accs.avg)) cudnn.benchmark = False model.eval() requires_grad_(m, False) val_accs = AverageMeter() val_losses = AverageMeter() with torch.no_grad(): for i, (images, labels) in enumerate(tqdm.tqdm(val_loader, ncols=80)): images, labels = images.to(DEVICE), labels.to(DEVICE)
def main(epochs, seed): device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') cudnn.deterministic = True torch.manual_seed(seed) train_loader, valid_loader, test_loader = load_dataset() model = load_model() if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) model.to(device) optimizer, scheduler = get_optimizer_scheduler( parameters=model.parameters()) train_losses = AverageMeter() train_accs = AverageMeter() best_valid_acc = 0 best_valid_loss = float('inf') best_model_dict = deepcopy(model.state_dict()) for epoch in range(epochs): model.train() train_losses.reset(), train_accs.reset() loader_length = len(train_loader) pbar = tqdm(train_loader, ncols=80, desc='Training') start = time.time() for i, (images, labels) in enumerate(pbar): images, labels = images.to(device), labels.to(device, non_blocking=True) logits = model(images) predictions = model.pooling.predictions(logits=logits) loss = model.pooling.loss(logits=logits, labels=labels) optimizer.zero_grad() loss.backward() optimizer.step() acc = (predictions == labels).float().mean().item() loss = loss.item() step = epoch + i / loader_length ex.log_scalar('training.loss', loss, step) ex.log_scalar('training.acc', acc, step) train_losses.append(loss) train_accs.append(acc) scheduler.step() end = time.time() duration = end - start # evaluate on validation set valid_metrics = evaluate(model=model, loader=valid_loader, device=device) if valid_metrics['loss'] <= best_valid_loss: best_valid_acc = valid_metrics['accuracy'] best_valid_loss = valid_metrics['loss'] best_model_dict = deepcopy(model.state_dict()) ex.log_scalar('validation.loss', valid_metrics['loss'], epoch + 1) ex.log_scalar('validation.acc', valid_metrics['accuracy'], epoch + 1) print('Epoch {:02d} | Duration: {:.1f}s - per batch ({}): {:.3f}s'. format(epoch, duration, loader_length, duration / loader_length)) print( ' ' * 8, '| Train\tloss: {:.4f}\tacc: {:.3f}'.format( train_losses.avg, train_accs.avg)) print( ' ' * 8, '| Valid\tloss: {:.4f}\tacc: {:.3f}'.format( valid_metrics['loss'], valid_metrics['accuracy'])) # load best model based on validation loss model.load_state_dict(best_model_dict) # evaluate on test set test_metrics = evaluate(model=model, loader=test_loader, device=device) ex.log_scalar('test.loss', test_metrics['loss'], epochs) ex.log_scalar('test.acc', test_metrics['accuracy'], epochs) # save model save_name = get_save_name() + '.pickle' torch.save(state_dict_to_cpu(best_model_dict), save_name) ex.add_artifact(os.path.abspath(save_name)) # metrics to info.json for k, v in test_metrics.items(): ex.info[k] = v return test_metrics['accuracy']
def main(epochs, seed): device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') cudnn.deterministic = True torch.manual_seed(seed) train_loader, valid_loader, test_loader = load_dataset() model = load_unet() model = torch.nn.DataParallel(model) model.to(device) optimizer, scheduler = get_optimizer_scheduler( parameters=model.parameters()) train_losses = AverageMeter() batch_evaluator = Evaluator(2) train_dices_background = AverageMeter() train_dices = AverageMeter() best_valid_dice = 0 best_valid_loss = float('inf') best_model_dict = deepcopy(model.module.state_dict()) for epoch in range(epochs): model.train() train_losses.reset(), train_dices_background.reset( ), train_dices.reset() loader_length = len(train_loader) pbar = tqdm(train_loader, ncols=80, desc='Training') start = time.time() for i, (images, mask, label) in enumerate(pbar): images, mask = images.to(device), mask.squeeze(1).to( device, non_blocking=True) seg_logits = model(images) if ex.current_run.config['dataset']['name'] == 'caltech_birds': class_mask = (mask > 0.5).long() else: class_mask = (mask != 0).long() loss = F.cross_entropy(seg_logits, class_mask) optimizer.zero_grad() loss.backward() optimizer.step() batch_evaluator.add_batch(class_mask, seg_logits.argmax(1)) dices = batch_evaluator.dice() dice_background = dices[0].item() dice = dices[1].item() batch_evaluator.reset() loss = loss.item() step = epoch + i / loader_length ex.log_scalar('training.loss', loss, step) ex.log_scalar('training.mean_dice_background', dice_background, step) ex.log_scalar('training.mean_dice', dice, step) train_losses.append(loss) train_dices_background.append(dice_background) train_dices.append(dice) scheduler.step() duration = time.time() - start # evaluate on validation set valid_metrics = evaluate(model=model, loader=valid_loader, device=device) if valid_metrics['losses'].mean() <= best_valid_loss: best_valid_dice = valid_metrics['mean_dice'] best_valid_loss = valid_metrics['losses'].mean() best_model_dict = deepcopy(model.module.state_dict()) ex.log_scalar('validation.loss', np.mean(valid_metrics['losses']), epoch + 1) ex.log_scalar('validation.mean_dice', valid_metrics['mean_dice'], epoch + 1) print('Epoch {:02d} | Duration: {:.1f}s - per batch ({}): {:.3f}s'. format(epoch, duration, loader_length, duration / loader_length)) print( ' ' * 8, '| Train loss: {:.4f} dice(b): {:.3f} dice: {:.3f}'.format( train_losses.avg, train_dices_background.avg, train_dices.avg)) print( ' ' * 8, '| Valid loss: {:.4f} dice(b): {:.3f} dice: {:.3f}'.format( valid_metrics['losses'].mean(), valid_metrics['mean_dice_background'], valid_metrics['mean_dice'])) # load best model based on validation loss model = load_unet() model.load_state_dict(best_model_dict) model.to(device) # evaluate on test set test_metrics = evaluate(model=model, loader=test_loader, device=device, test=True) ex.log_scalar('test.loss', test_metrics['losses'].mean(), epochs) ex.log_scalar('test.mean_dice_background', test_metrics['mean_dice_background'], epochs) ex.log_scalar('test.mean_dice', test_metrics['mean_dice'], epochs) # save model save_name = get_save_name() + '.pickle' torch.save(state_dict_to_cpu(best_model_dict), save_name) ex.add_artifact(os.path.abspath(save_name)) # save test metrics if len(ex.current_run.observers) > 0: dataset = ex.current_run.config['dataset']['name'] split = ex.current_run.config['dataset']['split'] fold = ex.current_run.config['dataset']['fold'] torch.save( test_metrics, os.path.join( ex.current_run.observers[0].dir, '{}_unet_split-{}_fold-{}.pkl'.format(dataset, split, fold))) # metrics to info.json info_to_save = [ 'labels', 'losses', 'dice_per_image', 'mean_dice', 'dice', 'iou_per_image', 'mean_iou', 'iou' ] for k in info_to_save: ex.info[k] = test_metrics[k] return test_metrics['mean_dice']