def main(): # Retrieve config file p = create_config(args.config_env, args.config_exp) print(colored(p, 'red')) # Get model print(colored('Retrieve model', 'blue')) model = get_model(p, p['scan_model']) print(model) model = torch.nn.DataParallel(model) model = model.cuda() # Get criterion print(colored('Get loss', 'blue')) criterion = get_criterion(p) criterion.cuda() print(criterion) # CUDNN print(colored('Set CuDNN benchmark', 'blue')) torch.backends.cudnn.benchmark = True # Optimizer print(colored('Retrieve optimizer', 'blue')) optimizer = get_optimizer(p, model) print(optimizer) # Dataset print(colored('Retrieve dataset', 'blue')) # Transforms strong_transforms = get_train_transformations(p) val_transforms = get_val_transformations(p) train_dataset = get_train_dataset(p, {'standard': val_transforms, 'augment': strong_transforms}, split='train', to_augmented_dataset=True) train_dataloader = get_train_dataloader(p, train_dataset)
def main(): # Retrieve config file p = create_config(args) print(colored(p, 'red')) # Model print(colored('Retrieve model', 'blue')) model = get_model(p) print('Model is {}'.format(model.__class__.__name__)) print(model) model = torch.nn.DataParallel(model) model = model.cuda() # CUDNN print(colored('Set CuDNN benchmark', 'blue')) torch.backends.cudnn.benchmark = True # Dataset print(colored('Retrieve dataset', 'blue')) transforms = get_val_transformations(p) train_dataset = get_train_dataset(p, transforms) val_dataset = get_val_dataset(p, transforms) train_dataloader = get_val_dataloader(p, train_dataset) val_dataloader = get_val_dataloader(p, val_dataset) print('Dataset contains {}/{} train/val samples'.format( len(train_dataset), len(val_dataset))) # Memory Bank print(colored('Build MemoryBank', 'blue')) memory_bank_train = MemoryBank(len(train_dataset), 2048, p['num_classes'], p['temperature']) memory_bank_train.cuda() memory_bank_val = MemoryBank(len(val_dataset), 2048, p['num_classes'], p['temperature']) memory_bank_val.cuda() # Load the official MoCoV2 checkpoint print(colored('Downloading moco v2 checkpoint', 'blue')) os.system( 'wget -L https://dl.fbaipublicfiles.com/moco/moco_checkpoints/moco_v2_800ep/moco_v2_800ep_pretrain.pth.tar' ) moco_state = torch.load('moco_v2_800ep_pretrain.pth.tar', map_location='cpu') # Transfer moco weights print(colored('Transfer MoCo weights to model', 'blue')) new_state_dict = {} state_dict = moco_state['state_dict'] for k in list(state_dict.keys()): # Copy backbone weights if k.startswith('module.encoder_q' ) and not k.startswith('module.encoder_q.fc'): new_k = 'module.backbone.' + k[len('module.encoder_q.'):] new_state_dict[new_k] = state_dict[k] # Copy mlp weights elif k.startswith('module.encoder_q.fc'): new_k = 'module.contrastive_head.' + k[len('module.encoder_q.fc.' ):] new_state_dict[new_k] = state_dict[k] else: raise ValueError('Unexpected key {}'.format(k)) model.load_state_dict(new_state_dict) os.system('rm -rf moco_v2_800ep_pretrain.pth.tar') # Save final model print(colored('Save pretext model', 'blue')) torch.save(model.module.state_dict(), p['pretext_model']) model.module.contrastive_head = torch.nn.Identity( ) # In this case, we mine the neighbors before the MLP. # Mine the topk nearest neighbors (Train) # These will be used for training with the SCAN-Loss. topk = 50 print( colored('Mine the nearest neighbors (Train)(Top-%d)' % (topk), 'blue')) transforms = get_val_transformations(p) train_dataset = get_train_dataset(p, transforms) fill_memory_bank(train_dataloader, model, memory_bank_train) indices, acc = memory_bank_train.mine_nearest_neighbors(topk) print('Accuracy of top-%d nearest neighbors on train set is %.2f' % (topk, 100 * acc)) np.save(p['topk_neighbors_train_path'], indices) # Mine the topk nearest neighbors (Validation) # These will be used for validation. topk = 5 print(colored('Mine the nearest neighbors (Val)(Top-%d)' % (topk), 'blue')) fill_memory_bank(val_dataloader, model, memory_bank_val) print('Mine the neighbors') indices, acc = memory_bank_val.mine_nearest_neighbors(topk) print('Accuracy of top-%d nearest neighbors on val set is %.2f' % (topk, 100 * acc)) np.save(p['topk_neighbors_val_path'], indices)
def main(): args = FLAGS.parse_args() p = create_config(args.config_env, args.config_exp, args.tb_run) print(colored(p, 'red')) # CUDNN torch.backends.cudnn.benchmark = True # Data print(colored('Get dataset and dataloaders', 'blue')) train_transformations = get_train_transformations(p) val_transformations = get_val_transformations(p) train_dataset = get_train_dataset(p, train_transformations, split='train', to_similarity_dataset=True) val_dataset = get_val_dataset(p, val_transformations, to_similarity_dataset=True) train_dataloader = get_train_dataloader(p, train_dataset) val_dataloader = get_val_dataloader(p, val_dataset) print('Train transforms:', train_transformations) print('Validation transforms:', val_transformations) print('Train samples %d - Val samples %d' % (len(train_dataset), len(val_dataset))) # Tensorboard writer writer = SummaryWriter(log_dir=p['simpred_tb_dir']) # Model print(colored('Get model', 'blue')) model = get_model(p, p['pretext_model']) print(model) model = torch.nn.DataParallel(model) model = model.cuda() # Optimizer print(colored('Get optimizer', 'blue')) optimizer = get_optimizer(p, model, p['update_cluster_head_only']) print(optimizer) # Warning if p['update_cluster_head_only']: print(colored('WARNING: will only update the cluster head', 'red')) # Loss function print(colored('Get loss', 'blue')) criterion = get_criterion(p) criterion.cuda() print(criterion) # Checkpoint if os.path.exists(p['simpred_checkpoint']): print(colored('Restart from checkpoint {}'.format(p['simpred_checkpoint']), 'blue')) checkpoint = torch.load(p['simpred_checkpoint'], map_location='cpu') model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) start_epoch = checkpoint['epoch'] best_acc = checkpoint['best_acc'] else: print(colored('No checkpoint file at {}'.format(p['simpred_checkpoint']), 'blue')) start_epoch = 0 best_acc = 0 # Main loop print(colored('Starting main loop', 'blue')) for epoch in range(start_epoch, p['epochs']): print(colored('Epoch %d/%d' % (epoch + 1, p['epochs']), 'yellow')) print(colored('-' * 15, 'yellow')) # Adjust lr lr = adjust_learning_rate(p, optimizer, epoch) print('Adjusted learning rate to {:.5f}'.format(lr)) # Train print('Train ...') simpred_train(train_dataloader, model, criterion, optimizer, epoch, writer, p['update_cluster_head_only']) # Evaluate print('Make prediction on validation set ...') predictions = get_predictions(p, val_dataloader, model) print('Evaluate based on simpred loss ...') simpred_stats = simpred_evaluate(predictions, writer, epoch) print(simpred_stats) accuracy = simpred_stats['accuracy'] if accuracy > best_acc: print('New highest accuracy on validation set: %.4f -> %.4f' % (best_acc, accuracy)) best_acc = accuracy torch.save({'model': model.module.state_dict()}, p['simpred_model']) else: print('No new highest accuracy on validation set: %.4f -> %.4f' % (best_acc, accuracy)) # Checkpoint print('Checkpoint ...') torch.save({'optimizer': optimizer.state_dict(), 'model': model.state_dict(), 'epoch': epoch + 1, 'best_acc': best_acc}, p['simpred_checkpoint']) # Evaluate and save the final model print(colored('Evaluate best model based on simpred metric at the end', 'blue')) model_checkpoint = torch.load(p['simpred_model'], map_location='cpu') model.module.load_state_dict(model_checkpoint['model']) predictions, features, thumbnails = get_predictions(p, val_dataloader, model, return_features=True, return_thumbnails=True) writer.add_embedding(features, predictions[0]['targets'], thumbnails, p['epochs'], p['simpred_tb_dir'])
def main(): # Retrieve config file p = create_config(args.config_env, args.config_exp) print(colored(p, 'red')) # Get model print(colored('Retrieve model', 'blue')) model = get_model(p, p['scan_model']) print(model) model = torch.nn.DataParallel(model) model = model.cuda() # Get criterion print(colored('Get loss', 'blue')) criterion = get_criterion(p) criterion.cuda() print(criterion) # CUDNN print(colored('Set CuDNN benchmark', 'blue')) torch.backends.cudnn.benchmark = True # Optimizer print(colored('Retrieve optimizer', 'blue')) optimizer = get_optimizer(p, model) print(optimizer) # Dataset print(colored('Retrieve dataset', 'blue')) # Transforms strong_transforms = get_train_transformations(p) val_transforms = get_val_transformations(p) train_dataset = get_train_dataset(p, {'standard': val_transforms, 'augment': strong_transforms}, split='train', to_augmented_dataset=True) train_dataloader = get_train_dataloader(p, train_dataset) val_dataset = get_val_dataset(p, val_transforms) val_dataloader = get_val_dataloader(p, val_dataset) print(colored('Train samples %d - Val samples %d' %(len(train_dataset), len(val_dataset)), 'yellow')) # Checkpoint if os.path.exists(p['selflabel_checkpoint']): print(colored('Restart from checkpoint {}'.format(p['selflabel_checkpoint']), 'blue')) checkpoint = torch.load(p['selflabel_checkpoint'], map_location='cpu') model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) start_epoch = checkpoint['epoch'] else: print(colored('No checkpoint file at {}'.format(p['selflabel_checkpoint']), 'blue')) start_epoch = 0 # EMA if p['use_ema']: ema = EMA(model, alpha=p['ema_alpha']) else: ema = None # Main loop print(colored('Starting main loop', 'blue')) for epoch in range(start_epoch, p['epochs']): print(colored('Epoch %d/%d' %(epoch+1, p['epochs']), 'yellow')) print(colored('-'*10, 'yellow')) # Adjust lr lr = adjust_learning_rate(p, optimizer, epoch) print('Adjusted learning rate to {:.5f}'.format(lr)) # Perform self-labeling print('Train ...') selflabel_train(train_dataloader, model, criterion, optimizer, epoch, ema=ema) # Evaluate (To monitor progress - Not for validation) print('Evaluate ...') predictions = get_predictions(p, val_dataloader, model) clustering_stats = hungarian_evaluate(0, predictions, compute_confusion_matrix=False) print(clustering_stats) # Checkpoint print('Checkpoint ...') torch.save({'optimizer': optimizer.state_dict(), 'model': model.state_dict(), 'epoch': epoch + 1}, p['selflabel_checkpoint']) #torch.save(model.module.state_dict(), p['selflabel_model']) torch.save(model.module.state_dict(), os.path.join(p['selflabel_dir'], 'model_%d.pth.tar' %(epoch))) # Evaluate and save the final model print(colored('Evaluate model at the end', 'blue')) predictions = get_predictions(p, val_dataloader, model) clustering_stats = hungarian_evaluate(0, predictions, class_names=val_dataset.classes, compute_confusion_matrix=True, confusion_matrix_file=os.path.join(p['selflabel_dir'], 'confusion_matrix.png')) print(clustering_stats) torch.save(model.module.state_dict(), p['selflabel_model'])
def main(): # Retrieve config file p = create_config(args.config_env, args.config_exp) print(colored(p, 'red')) # Model print(colored('Retrieve model', 'blue')) model = get_model(p) print('Model is {}'.format(model.__class__.__name__)) print('Model parameters: {:.2f}M'.format(sum(p.numel() for p in model.parameters()) / 1e6)) print(model) model = model.cuda() # CUDNN print(colored('Set CuDNN benchmark', 'blue')) torch.backends.cudnn.benchmark = True # Dataset print(colored('Retrieve dataset', 'blue')) train_transforms = get_train_transformations(p) print('Train transforms:', train_transforms) val_transforms = get_val_transformations(p) print('Validation transforms:', val_transforms) train_dataset = get_train_dataset(p, train_transforms, to_augmented_dataset=True, split='train+unlabeled') # Split is for stl-10 val_dataset = get_val_dataset(p, val_transforms) train_dataloader = get_train_dataloader(p, train_dataset) val_dataloader = get_val_dataloader(p, val_dataset) print('Dataset contains {}/{} train/val samples'.format(len(train_dataset), len(val_dataset))) # Memory Bank print(colored('Build MemoryBank', 'blue')) base_dataset = get_train_dataset(p, val_transforms, split='train') # Dataset w/o augs for knn eval base_dataloader = get_val_dataloader(p, base_dataset) memory_bank_base = MemoryBank(len(base_dataset), p['model_kwargs']['features_dim'], p['num_classes'], p['criterion_kwargs']['temperature']) memory_bank_base.cuda() memory_bank_val = MemoryBank(len(val_dataset), p['model_kwargs']['features_dim'], p['num_classes'], p['criterion_kwargs']['temperature']) memory_bank_val.cuda() # Criterion print(colored('Retrieve criterion', 'blue')) criterion = get_criterion(p) print('Criterion is {}'.format(criterion.__class__.__name__)) criterion = criterion.cuda() # Optimizer and scheduler print(colored('Retrieve optimizer', 'blue')) optimizer = get_optimizer(p, model) print(optimizer) # Checkpoint if os.path.exists(p['pretext_checkpoint']): print(colored('Restart from checkpoint {}'.format(p['pretext_checkpoint']), 'blue')) checkpoint = torch.load(p['pretext_checkpoint'], map_location='cpu') optimizer.load_state_dict(checkpoint['optimizer']) model.load_state_dict(checkpoint['model']) model.cuda() start_epoch = checkpoint['epoch'] else: print(colored('No checkpoint file at {}'.format(p['pretext_checkpoint']), 'blue')) start_epoch = 0 model = model.cuda() # Training print(colored('Starting main loop', 'blue')) for epoch in range(start_epoch, p['epochs']): print(colored('Epoch %d/%d' %(epoch, p['epochs']), 'yellow')) print(colored('-'*15, 'yellow')) # Adjust lr lr = adjust_learning_rate(p, optimizer, epoch) print('Adjusted learning rate to {:.5f}'.format(lr)) # Train print('Train ...') simclr_train(train_dataloader, model, criterion, optimizer, epoch) # Fill memory bank print('Fill memory bank for kNN...') fill_memory_bank(base_dataloader, model, memory_bank_base) # Evaluate (To monitor progress - Not for validation) print('Evaluate ...') top1 = contrastive_evaluate(val_dataloader, model, memory_bank_base) print('Result of kNN evaluation is %.2f' %(top1)) # Checkpoint print('Checkpoint ...') torch.save({'optimizer': optimizer.state_dict(), 'model': model.state_dict(), 'epoch': epoch + 1}, p['pretext_checkpoint']) if epoch in [50, 75]: # Save final model # torch.save(model.state_dict(), p['pretext_model']) # Mine the topk nearest neighbors at the very end (Train) # These will be served as input to the SCAN loss. print(colored('Fill memory bank for mining the nearest neighbors (train) ...', 'blue')) fill_memory_bank(base_dataloader, model, memory_bank_base) topk = 20 print('Mine the nearest neighbors (Top-%d)' %(topk)) indices, acc = memory_bank_base.mine_nearest_neighbors(topk) print('Accuracy of top-%d nearest neighbors on train set is %.2f' %(topk, 100*acc)) # np.save(p['topk_neighbors_train_path'], indices) # Mine the topk nearest neighbors at the very end (Val) # These will be used for validation. print(colored('Fill memory bank for mining the nearest neighbors (val) ...', 'blue')) fill_memory_bank(val_dataloader, model, memory_bank_val) topk = 5 print('Mine the nearest neighbors (Top-%d)' %(topk)) indices, acc = memory_bank_val.mine_nearest_neighbors(topk) print('Accuracy of top-%d nearest neighbors on val set is %.2f' %(topk, 100*acc)) # np.save(p['topk_neighbors_val_path'], indices) # Save final model torch.save(model.state_dict(), p['pretext_model']) # Mine the topk nearest neighbors at the very end (Train) # These will be served as input to the SCAN loss. print(colored('Fill memory bank for mining the nearest neighbors (train) ...', 'blue')) fill_memory_bank(base_dataloader, model, memory_bank_base) topk = 20 print('Mine the nearest neighbors (Top-%d)' %(topk)) indices, acc = memory_bank_base.mine_nearest_neighbors(topk) print('Accuracy of top-%d nearest neighbors on train set is %.2f' %(topk, 100*acc)) np.save(p['topk_neighbors_train_path'], indices) # Mine the topk nearest neighbors at the very end (Val) # These will be used for validation. print(colored('Fill memory bank for mining the nearest neighbors (val) ...', 'blue')) fill_memory_bank(val_dataloader, model, memory_bank_val) topk = 5 print('Mine the nearest neighbors (Top-%d)' %(topk)) indices, acc = memory_bank_val.mine_nearest_neighbors(topk) print('Accuracy of top-%d nearest neighbors on val set is %.2f' %(topk, 100*acc)) np.save(p['topk_neighbors_val_path'], indices)
def main(): #try: # Retrieve config file cv2.setNumThreads(0) p = create_config(args.config_env, args.config_exp, args.save_name) sys.stdout = Logger(os.path.join(p['output_dir'], 'log_file.txt')) print(colored(p, 'red')) # Get model print(colored('Retrieve model', 'blue')) model = get_model(p) model = torch.nn.DataParallel(model) model = model.cuda() # device=device) # Get criterion print(colored('Get loss', 'blue')) criterion = get_criterion(p) criterion.cuda() # device=device) print(criterion) # CUDNN print(colored('Set CuDNN benchmark', 'blue')) torch.backends.cudnn.benchmark = True # Optimizer print(colored('Retrieve optimizer', 'blue')) optimizer = get_optimizer(p, model) print(optimizer) # Dataset print(colored('Retrieve dataset', 'blue')) # Transforms train_transforms, val_transforms = get_transformations(p) train_dataset = get_train_dataset(p, train_transforms) val_dataset = get_val_dataset(p, val_transforms) true_val_dataset = get_val_dataset( p, None) # True validation dataset without reshape train_dataloader = get_train_dataloader(p, train_dataset) val_dataloader = get_val_dataloader(p, val_dataset) print('Train samples %d - Val samples %d' % (len(train_dataset), len(val_dataset))) print('Train transformations:') print(train_transforms) print('Val transformations:') print(val_transforms) # Resume from checkpoint if os.path.exists(p['checkpoint']): print( colored('Restart from checkpoint {}'.format(p['checkpoint']), 'blue')) checkpoint = torch.load(p['checkpoint'], map_location='cpu') optimizer.load_state_dict(checkpoint['optimizer']) model.load_state_dict(checkpoint['model']) start_epoch = checkpoint['epoch'] best_result = checkpoint['best_result'] else: print( colored('No checkpoint file at {}'.format(p['checkpoint']), 'blue')) start_epoch = 0 save_model_predictions(p, val_dataloader, model) best_result = eval_all_results(p) # Main loop print(colored('Starting main loop', 'blue')) for epoch in range(start_epoch, p['epochs']): print(colored('Epoch %d/%d' % (epoch + 1, p['epochs']), 'yellow')) print(colored('-' * 10, 'yellow')) # Adjust lr lr = adjust_learning_rate(p, optimizer, epoch) print('Adjusted learning rate to {:.5f}'.format(lr)) # Train print('Train ...') eval_train = train_vanilla(p, train_dataloader, model, criterion, optimizer, epoch) # Evaluate # Check if need to perform eval first if 'eval_final_10_epochs_only' in p.keys( ) and p['eval_final_10_epochs_only']: # To speed up -> Avoid eval every epoch, and only test during final 10 epochs. if epoch + 1 > p['epochs'] - 10: eval_bool = True else: eval_bool = False else: eval_bool = True # Perform evaluation if eval_bool: print('Evaluate ...') save_model_predictions(p, val_dataloader, model) curr_result = eval_all_results(p) improves, best_result = validate_results(p, curr_result, best_result) if improves: print('Save new best model') torch.save(model.state_dict(), p['best_model']) # Checkpoint print('Checkpoint ...') torch.save( { 'optimizer': optimizer.state_dict(), 'model': model.state_dict(), 'epoch': epoch + 1, 'best_result': best_result }, p['checkpoint']) # Evaluate best model at the end print(colored('Evaluating best model at the end', 'blue')) model.load_state_dict(torch.load(p['checkpoint'])['model']) print("Model state dict keys: ", model.state_dict().keys()) #print("Model state dict all: ", model.state_dict().items()) save_model_predictions(p, val_dataloader, model) eval_stats = eval_all_results(p) send_email(target_mail_address_list, server_name=server_name, exception_message="Success!", successfully=True)
def main(): # Retrieve config file p = create_config(args.config_env, args.config_exp) print(colored(p, 'red')) # Model print(colored('Retrieve model', 'green')) model = get_model(p) print('Model is {}'.format(model.__class__.__name__)) print('Model parameters: {:.2f}M'.format( sum(p.numel() for p in model.parameters()) / 1e6)) print(model) model = model.to(device) # CUDNN print(colored('Set CuDNN benchmark', 'green')) torch.backends.cudnn.benchmark = True # Dataset print(colored('Retrieve dataset', 'green')) train_transforms = get_train_transformations(p) print('Train transforms:', train_transforms) val_transforms = get_val_transformations(p) print('Validation transforms:', val_transforms) train_dataset = get_train_dataset(p, train_transforms, to_augmented_dataset=True, split='train') # Split is for stl-10 val_dataset = get_val_dataset(p, val_transforms) train_dataloader = get_val_dataloader(p, train_dataset) val_dataloader = get_val_dataloader(p, val_dataset) print('Dataset contains {}/{} train/val samples'.format( len(train_dataset), len(val_dataset))) # Memory Bank print(colored('Build MemoryBank', 'green')) base_dataset = get_train_dataset( p, val_transforms, split='train') # Dataset w/o augs for knn eval base_dataloader = get_val_dataloader(p, base_dataset) memory_bank_base = MemoryBank(len(base_dataset), p['model_kwargs']['features_dim'], p['num_classes'], p['criterion_kwargs']['temperature']) memory_bank_base.to(device) memory_bank_val = MemoryBank(len(val_dataset), p['model_kwargs']['features_dim'], p['num_classes'], p['criterion_kwargs']['temperature']) memory_bank_val.to(device) # Checkpoint if os.path.exists(p['pretext_checkpoint']): print( colored( 'Restart from checkpoint {}'.format(p['pretext_checkpoint']), 'green')) checkpoint = torch.load(p['pretext_checkpoint'], map_location='cpu') # optimizer.load_state_dict(checkpoint['optimizer']) model.load_state_dict(checkpoint['model']) model.to(device) # start_epoch = checkpoint['epoch'] else: print( colored('No checkpoint file at {}'.format(p['pretext_checkpoint']), 'green')) start_epoch = 0 model = model.to(device) # # Training # print(colored('Starting main loop', 'green')) # with torch.no_grad(): # model.eval() # total_top1, total_top5, total_num, feature_bank = 0.0, 0.0, 0, [] # # # progress_bar = tqdm(train_dataloader) # for idx, batch in enumerate(train_dataloader): # images = batch['image'].to(device, non_blocking=True) # # target = batch['target'].to(device, non_blocking=True) # # output = model(images) # feature = F.normalize(output, dim=1) # feature_bank.append(feature) # # if idx % 25 == 0: # print("Feature bank buidling : {} / {}".format(idx, len(train_dataset)/p["batch_size"])) # # # [D, N] # feature_bank = torch.cat(feature_bank, dim=0).t().contiguous() # print(colored("Feature bank created. Similarity index starts now", "green")) # print(feature_bank.size()) # # for idx, batch in enumerate(train_dataloader): # # images = batch['image'].to(device, non_blocking=True) # # target = batch['target'].to(device, non_blocking=True) # # output = model(images) # feature = F.normalize(output, dim=1) # # sim_indices = knn_predict(feature, feature_bank, "", "", 10, 0.1) # # print(sim_indices) # # if idx == 10: # break # # Mine the topk nearest neighbors at the very end (Train) # # These will be served as input to the SCAN loss. # print(colored('Fill memory bank for mining the nearest neighbors (train) ...', 'green')) # fill_memory_bank(base_dataloader, model, memory_bank_base) # topk = 20 # print('Mine the nearest neighbors (Top-%d)' %(topk)) # indices, acc = memory_bank_base.mine_nearest_neighbors(topk) # print('Accuracy of top-%d nearest neighbors on train set is %.2f' %(topk, 100*acc)) # np.save(p['topk_neighbors_train_path'], indices) # Mine the topk nearest neighbors at the very end (Val) # These will be used for validation. print( colored('Fill memory bank for mining the nearest neighbors (val) ...', 'green')) fill_memory_bank(val_dataloader, model, memory_bank_val) topk = 5 print('Mine the nearest neighbors (Top-%d)' % (topk)) indices, acc = memory_bank_val.mine_nearest_neighbors(topk) print('Accuracy of top-%d nearest neighbors on val set is %.2f' % (topk, 100 * acc)) np.save(p['topk_neighbors_val_path'], indices)
def main(): # Retrieve config file p = create_config(args.config_env, args.config_exp) print(colored(p, 'red')) # Model print(colored('Retrieve model', 'green')) model = get_model(p) print('Model is {}'.format(model.__class__.__name__)) print(model) # model = torch.nn.DataParallel(model) model = model.to(device) # CUDNN print(colored('Set CuDNN benchmark', 'green')) torch.backends.cudnn.benchmark = True # Dataset print(colored('Retrieve dataset', 'green')) transforms = get_val_transformations(p) train_dataset = get_train_dataset(p, transforms) val_dataset = get_val_dataset(p, transforms) train_dataloader = get_val_dataloader(p, train_dataset) val_dataloader = get_val_dataloader(p, val_dataset) print('Dataset contains {}/{} train/val samples'.format(len(train_dataset), len(val_dataset))) # Memory Bank print(colored('Build MemoryBank', 'green')) memory_bank_train = MemoryBank(len(train_dataset), 2048, p['num_classes'], p['temperature']) memory_bank_train.to(device) memory_bank_val = MemoryBank(len(val_dataset), 2048, p['num_classes'], p['temperature']) memory_bank_val.to(device) # Load the official MoCoV2 checkpoint print(colored('Downloading moco v2 checkpoint', 'green')) # os.system('wget -L https://dl.fbaipublicfiles.com/moco/moco_checkpoints/moco_v2_800ep/moco_v2_800ep_pretrain.pth.tar') # Uploaded the model to Mist : Johan moco_state = torch.load(main_dir + model_dir + 'moco_v2_800ep_pretrain.pth.tar', map_location=device) # Transfer moco weights print(colored('Transfer MoCo weights to model', 'green')) new_state_dict = {} state_dict = moco_state['state_dict'] # for k in list(state_dict.keys()): # # Copy backbone weights # if k.startswith('module.encoder_q') and not k.startswith('module.encoder_q.fc'): # new_k = 'module.backbone.' + k[len('module.encoder_q.'):] # new_state_dict[new_k] = state_dict[k] # # # Copy mlp weights # elif k.startswith('module.encoder_q.fc'): # new_k = 'module.contrastive_head.' + k[len('module.encoder_q.fc.'):] # new_state_dict[new_k] = state_dict[k] # # else: # raise ValueError('Unexpected key {}'.format(k)) #Changed by Johan for k, v in state_dict.items(): if "conv" in k or "bn" in k or "layer" in k: new_k = "backbone." + k.split("module.encoder_q.")[1] new_state_dict[new_k] = v else: new_k = "contrastive_head." + k.split("module.encoder_q.fc.")[1] new_state_dict[new_k] = v model.load_state_dict(new_state_dict) # os.system('rm -rf moco_v2_800ep_pretrain.pth.tar') # Save final model print(colored('Save pretext model', 'green')) torch.save(model.state_dict(), p['pretext_model']) # model.contrastive_head = torch.nn.Identity() # In this case, we mine the neighbors before the MLP. model.contrastive_head = Identity()
def main(): args = FLAGS.parse_args() p = create_config(args.config_env, args.config_exp) print(colored(p, 'red')) # CUDNN torch.backends.cudnn.benchmark = True # Data print(colored('Get dataset and dataloaders', 'blue')) train_transformations = get_train_transformations(p) #val_transformations = get_val_transformations(p) train_dataset = get_train_dataset(p, train_transformations, split='train', to_neighbors_dataset = True) #val_dataset = get_val_dataset(p, val_transformations, to_neighbors_dataset = True) train_dataloader = get_train_dataloader(p, train_dataset) val_dataloader = get_val_dataloader(p, train_dataset) #!val_ replaced with train_ print('Train transforms:', train_transformations) #print('Validation transforms:', val_transformations) #print('Train samples %d - Val samples %d' %(len(train_dataset), len(val_dataset))) # Model print(colored('Get model', 'blue')) model = get_model(p, p['pretext_model']) print(model) model = torch.nn.DataParallel(model) model = model.cuda() # Optimizer print(colored('Get optimizer', 'blue')) optimizer = get_optimizer(p, model, p['update_cluster_head_only']) print(optimizer) # Warning if p['update_cluster_head_only']: print(colored('WARNING: SCAN will only update the cluster head', 'red')) # Loss function print(colored('Get loss', 'blue')) criterion = get_criterion(p) criterion.cuda() print(criterion) # Checkpoint if os.path.exists(p['scan_checkpoint']): print(colored('Restart from checkpoint {}'.format(p['scan_checkpoint']), 'blue')) checkpoint = torch.load(p['scan_checkpoint'], map_location='cpu') model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) start_epoch = checkpoint['epoch'] best_loss = checkpoint['best_loss'] best_loss_head = checkpoint['best_loss_head'] else: print(colored('No checkpoint file at {}'.format(p['scan_checkpoint']), 'blue')) start_epoch = 0 best_loss = 1e4 best_loss_head = None # Main loop print(colored('Starting main loop', 'blue')) for epoch in range(start_epoch, p['epochs']): print(colored('Epoch %d/%d' %(epoch+1, p['epochs']), 'yellow')) print(colored('-'*15, 'yellow')) # Adjust lr lr = adjust_learning_rate(p, optimizer, epoch) print('Adjusted learning rate to {:.5f}'.format(lr)) # Train print('Train ...') scan_train(train_dataloader, model, criterion, optimizer, epoch, p['update_cluster_head_only']) # Evaluate #!!!!!!!!!!!!!!!!!Skipping the next lines because we are not evaluating YET. print('Make prediction on validation set ...') predictions = get_predictions(p, val_dataloader, model) #inputting the train data to get the clusters !!
def main(): args = FLAGS.parse_args() p = create_config(args.config_env, args.config_exp, args.tb_run) print(colored(p, 'red')) # CUDNN torch.backends.cudnn.benchmark = True # Data print(colored('Get dataset and dataloaders', 'blue')) train_transformations = get_train_transformations(p) val_transformations = get_val_transformations(p) train_dataset = get_train_dataset(p, train_transformations, use_negatives=not p['use_simpred_model'], use_simpred=p['use_simpred_model'], split='train', to_neighbors_dataset=True) val_dataset = get_val_dataset(p, val_transformations, use_negatives=not p['use_simpred_model'], use_simpred=p['use_simpred_model'], to_neighbors_dataset=True) train_dataloader = get_train_dataloader(p, train_dataset) val_dataloader = get_val_dataloader(p, val_dataset) print('Train transforms:', train_transformations) print('Validation transforms:', val_transformations) print('Train samples %d - Val samples %d' % (len(train_dataset), len(val_dataset))) # Tensorboard writer writer = SummaryWriter(log_dir=p['scan_tb_dir']) # Model print(colored('Get model', 'blue')) model = get_model(p, p['pretext_model']) print(model) model = torch.nn.DataParallel(model) model = model.cuda() # Simpred Model if p['use_simpred_model']: print(colored('Get simpred model', 'blue')) simpred_model = get_model(p, p['simpred_model'], load_simpred=True) print(simpred_model) simpred_model = torch.nn.DataParallel(simpred_model) simpred_model = simpred_model.cuda() for param in simpred_model.parameters(): param.requires_grad = False else: print('Not using simpred model') simpred_model = None # Optimizer print(colored('Get optimizer', 'blue')) optimizer = get_optimizer(p, model, p['update_cluster_head_only']) print(optimizer) # Warning if p['update_cluster_head_only']: print(colored('WARNING: SCAN will only update the cluster head', 'red')) # Loss function print(colored('Get loss', 'blue')) criterion = get_criterion(p) criterion.cuda() print(criterion) # Checkpoint if os.path.exists(p['scan_checkpoint']): print( colored('Restart from checkpoint {}'.format(p['scan_checkpoint']), 'blue')) checkpoint = torch.load(p['scan_checkpoint'], map_location='cpu') model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) start_epoch = checkpoint['epoch'] best_acc = checkpoint['best_acc'] best_acc_head = checkpoint['best_acc_head'] else: print( colored('No checkpoint file at {}'.format(p['scan_checkpoint']), 'blue')) start_epoch = 0 best_acc = 0 best_acc_head = None # Main loop print(colored('Starting main loop', 'blue')) for epoch in range(start_epoch, p['epochs']): print(colored('Epoch %d/%d' % (epoch + 1, p['epochs']), 'yellow')) print(colored('-' * 15, 'yellow')) # Adjust lr lr = adjust_learning_rate(p, optimizer, epoch) print('Adjusted learning rate to {:.5f}'.format(lr)) # Train print('Train ...') umcl_train(train_dataloader, model, simpred_model, criterion, optimizer, epoch, writer, p['update_cluster_head_only']) # Evaluate print('Make prediction on validation set ...') predictions = get_predictions(p, val_dataloader, model) print('Evaluate based on similarity accuracy') stats = umcl_evaluate(p, val_dataloader, model, simpred_model) print(stats) highest_acc_head = stats['highest_acc_head'] highest_acc = stats['highest_acc'] if highest_acc > best_acc: print('New highest accuracy on validation set: %.4f -> %.4f' % (best_acc, highest_acc)) print('Highest accuracy head is %d' % highest_acc_head) best_acc = highest_acc best_acc_head = highest_acc_head torch.save( { 'model': model.module.state_dict(), 'head': best_acc_head }, p['scan_model']) else: print('No new highest accuracy on validation set: %.4f -> %.4f' % (best_acc, highest_acc)) print('Highest accuracy head is %d' % highest_acc_head) print('Evaluate with hungarian matching algorithm ...') clustering_stats = hungarian_evaluate(highest_acc_head, predictions, compute_confusion_matrix=False, tf_writer=writer, epoch=epoch) print(clustering_stats) # Checkpoint print('Checkpoint ...') torch.save( { 'optimizer': optimizer.state_dict(), 'model': model.state_dict(), 'epoch': epoch + 1, 'best_acc': best_acc, 'best_acc_head': best_acc_head }, p['scan_checkpoint']) # Evaluate and save the final model print( colored('Evaluate best model based on similarity accuracy at the end', 'blue')) model_checkpoint = torch.load(p['scan_model'], map_location='cpu') model.module.load_state_dict(model_checkpoint['model']) predictions, features, thumbnails = get_predictions(p, val_dataloader, model, return_features=True, return_thumbnails=True) writer.add_embedding(features, predictions[0]['targets'], thumbnails, p['epochs'], p['scan_tb_dir']) clustering_stats = hungarian_evaluate(model_checkpoint['head'], predictions, class_names=val_dataset.classes, compute_confusion_matrix=True, confusion_matrix_file=os.path.join( p['scan_dir'], 'confusion_matrix.png')) print(clustering_stats)
def main_worker(gpu, ngpus_per_node, args): # Retrieve config file p = create_config(args.config_env, args.config_exp) # Check gpu id args.gpu = gpu p['gpu'] = gpu if args.gpu != 0: def print_pass(*args): pass builtins.print = print_pass else: sys.stdout = Logger(os.path.join(p['output_dir'], 'log_file.txt')) if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) print('Python script is {}'.format(os.path.abspath(__file__))) print(colored(p, 'red')) # Get model print(colored('Retrieve model', 'blue')) model = ContrastiveModel(p) torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # Optimizer print(colored('Retrieve optimizer', 'blue')) optimizer = get_optimizer(p, model.parameters()) print(optimizer) # Nvidia-apex if args.nvidia_apex: print(colored('Using mixed precision training', 'blue')) from apex import amp model, optimizer = amp.initialize(model, optimizer, opt_level="O2", keep_batchnorm_fp32=True, loss_scale="dynamic") else: amp = None # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have p['train_batch_size'] = int(p['train_batch_size'] / ngpus_per_node) p['num_workers'] = int( (p['num_workers'] + ngpus_per_node - 1) / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu], find_unused_parameters=True) # CUDNN print(colored('Set CuDNN benchmark', 'blue')) torch.backends.cudnn.benchmark = True # Dataset print(colored('Retrieve dataset', 'blue')) # Transforms train_transform = get_train_transformations() print(train_transform) train_dataset = DatasetKeyQuery( get_train_dataset(p, transform=None), train_transform, downsample_sal=not p['model_kwargs']['upsample']) train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=p['train_batch_size'], shuffle=(train_sampler is None), num_workers=p['num_workers'], pin_memory=True, sampler=train_sampler, drop_last=True, collate_fn=collate_custom) print(colored('Train samples %d' % (len(train_dataset)), 'yellow')) print(colored(train_dataset, 'yellow')) # Resume from checkpoint if os.path.exists(p['checkpoint']): print( colored('Restart from checkpoint {}'.format(p['checkpoint']), 'blue')) loc = 'cuda:{}'.format(args.gpu) checkpoint = torch.load(p['checkpoint'], map_location=loc) optimizer.load_state_dict(checkpoint['optimizer']) model.load_state_dict(checkpoint['model']) if args.nvidia_apex: amp.load_state_dict(checkpoint['amp']) start_epoch = checkpoint['epoch'] else: print( colored('No checkpoint file at {}'.format(p['checkpoint']), 'blue')) start_epoch = 0 model = model.cuda() # Main loop print(colored('Starting main loop', 'blue')) for epoch in range(start_epoch, p['epochs']): print(colored('Epoch %d/%d' % (epoch + 1, p['epochs']), 'yellow')) print(colored('-' * 10, 'yellow')) # Adjust lr lr = adjust_learning_rate(p, optimizer, epoch) print('Adjusted learning rate to {:.5f}'.format(lr)) # Train print('Train ...') eval_train = train(p, train_dataloader, model, optimizer, epoch, amp) # Checkpoint if args.rank % ngpus_per_node == 0: print('Checkpoint ...') if args.nvidia_apex: torch.save( { 'optimizer': optimizer.state_dict(), 'model': model.state_dict(), 'amp': amp.state_dict(), 'epoch': epoch + 1 }, p['checkpoint']) else: torch.save( { 'optimizer': optimizer.state_dict(), 'model': model.state_dict(), 'epoch': epoch + 1 }, p['checkpoint'])
def main(): args = FLAGS.parse_args() p = create_config(args.config_env, args.config_exp) print(colored(p, 'red')) # CUDNN torch.backends.cudnn.benchmark = True # Data print(colored('Get dataset and dataloaders', 'blue')) train_transformations = get_train_transformations(p) val_transformations = get_val_transformations(p) print('Train transforms:', train_transformations) print('Validation transforms:', val_transformations) train_dataset = get_train_dataset(p, train_transformations, split='train') val_dataset = get_val_dataset(p, val_transformations) train_dataloader = get_train_dataloader(p, train_dataset) val_dataloader = get_val_dataloader(p, val_dataset) print('Train samples %d - Val samples %d' % (len(train_dataset), len(val_dataset))) # Model print(colored('Get model', 'blue')) model = get_model(p) print(model) # Optimizer print(colored('Get optimizer', 'blue')) optimizer = get_optimizer(p, model, p['update_cluster_head_only']) print(optimizer) # Warning if p['update_cluster_head_only']: print( colored( 'WARNING: Linear probing will only update the cluster head', 'red')) # Loss function print(colored('Get loss', 'blue')) criterion = get_criterion(p) criterion.cuda() print(criterion) model = torch.nn.DataParallel(model) model = model.cuda() state = torch.load(p['pretext_model'], map_location='cpu') missing = model.load_state_dict(state, strict=False) print('missing components', missing) if args.mode == 'train': # Checkpoint if os.path.exists(p['linearprobe_checkpoint']): print( colored( 'Restart from checkpoint {}'.format( p['linearprobe_checkpoint']), 'blue')) checkpoint = torch.load(p['linearprobe_checkpoint'], map_location='cpu') model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) start_epoch = checkpoint['epoch'] best_loss = checkpoint['best_loss'] else: print( colored( 'No checkpoint file at {}'.format( p['linearprobe_checkpoint']), 'blue')) start_epoch = 0 best_loss = 1e4 # Main loop print(colored('Starting main loop', 'blue')) for epoch in range(start_epoch, p['epochs']): print(colored('Epoch %d/%d' % (epoch + 1, p['epochs']), 'yellow')) print(colored('-' * 15, 'yellow')) # Adjust lr lr = adjust_learning_rate(p, optimizer, epoch) print('Adjusted learning rate to {:.5f}'.format(lr)) # Train print('Train ...') linearprobe_train(train_dataloader, model, criterion, optimizer, epoch) if (epoch + 1) % 5 == 0: print('Evaluate based on CE loss ...') linearprobe_stats = linearprobe_evaluate( val_dataloader, model, criterion) loss = linearprobe_stats['loss'] if loss < best_loss: best_loss = loss torch.save({'model': model.module.state_dict()}, p['linearprobe_model']) # Checkpoint print('Checkpoint ...') print(linearprobe_stats) torch.save( { 'optimizer': optimizer.state_dict(), 'model': model.state_dict(), 'epoch': epoch + 1, 'best_loss': loss }, p['linearprobe_checkpoint']) # Evaluate and save the final model print(colored('Evaluate best model', 'blue')) model_checkpoint = torch.load(p['linearprobe_model'], map_location='cpu') model.module.load_state_dict(model_checkpoint['model']) linearprobe_stats = linearprobe_evaluate(val_dataloader, model, criterion) print(linearprobe_stats) print('Final Accuracy:', linearprobe_stats['accuracy'])
def main(): cv2.setNumThreads(1) # Retrieve config file p = create_config(args.config_env, args.config_exp) sys.stdout = Logger(p['log_file']) print('Python script is {}'.format(os.path.abspath(__file__))) print(colored(p, 'red')) # Get model print(colored('Retrieve model', 'blue')) model = get_model(p) print(model) model = model.cuda() # Freeze all layers except final 1 by 1 convolutional layer for name, param in model.named_parameters(): if name not in ['decoder.4.weight', 'decoder.4.bias']: param.requires_grad = False # Get criterion print(colored('Get loss', 'blue')) criterion = torch.nn.CrossEntropyLoss(ignore_index=255) criterion.cuda() print(criterion) # CUDNN print(colored('Set CuDNN benchmark', 'blue')) torch.backends.cudnn.benchmark = True # Optimizer print(colored('Retrieve optimizer', 'blue')) parameters = list(filter(lambda p: p.requires_grad, model.parameters())) assert len(parameters) == 2 # decoder.4.weight, decoder.4.bias optimizer = get_optimizer(p, parameters) print(optimizer) # Dataset print(colored('Retrieve dataset', 'blue')) train_transforms = get_train_transformations() val_transforms = get_val_transformations() train_dataset = get_train_dataset(p, train_transforms) val_dataset = get_val_dataset(p, val_transforms) true_val_dataset = get_val_dataset( p, None) # True validation dataset without reshape - For validation. train_dataloader = get_train_dataloader(p, train_dataset) val_dataloader = get_val_dataloader(p, val_dataset) print( colored( 'Train samples %d - Val samples %d' % (len(train_dataset), len(val_dataset)), 'yellow')) # Resume from checkpoint if os.path.exists(p['checkpoint']): print( colored('Restart from checkpoint {}'.format(p['checkpoint']), 'blue')) checkpoint = torch.load(p['checkpoint'], map_location='cpu') optimizer.load_state_dict(checkpoint['optimizer']) model.load_state_dict(checkpoint['model']) model.cuda() start_epoch = checkpoint['epoch'] best_epoch = checkpoint['best_epoch'] best_iou = checkpoint['best_iou'] else: print( colored('No checkpoint file at {}'.format(p['checkpoint']), 'blue')) start_epoch = 0 best_epoch = 0 best_iou = 0 model = model.cuda() # Main loop print(colored('Starting main loop', 'blue')) for epoch in range(start_epoch, p['epochs']): print(colored('Epoch %d/%d' % (epoch + 1, p['epochs']), 'yellow')) print(colored('-' * 10, 'yellow')) # Adjust lr lr = adjust_learning_rate(p, optimizer, epoch) print('Adjusted learning rate to {:.5f}'.format(lr)) # Train print('Train ...') eval_train = train_segmentation_vanilla( p, train_dataloader, model, criterion, optimizer, epoch, freeze_batchnorm=p['freeze_batchnorm']) # Evaluate online -> This will use batched eval where every image is resized to the same resolution. print('Evaluate ...') eval_val = eval_segmentation_supervised_online(p, val_dataloader, model) if eval_val['mIoU'] > best_iou: print('Found new best model: %.2f -> %.2f (mIoU)' % (100 * best_iou, 100 * eval_val['mIoU'])) best_iou = eval_val['mIoU'] best_epoch = epoch torch.save(model.state_dict(), p['best_model']) else: print('No new best model: %.2f -> %.2f (mIoU)' % (100 * best_iou, 100 * eval_val['mIoU'])) print('Last best model was found in epoch %d' % (best_epoch)) # Checkpoint print('Checkpoint ...') torch.save( { 'optimizer': optimizer.state_dict(), 'model': model.state_dict(), 'epoch': epoch + 1, 'best_epoch': best_epoch, 'best_iou': best_iou }, p['checkpoint']) # Evaluate best model at the end -> This will evaluate the predictions on the original resolution. print(colored('Evaluating best model at the end', 'blue')) model.load_state_dict(torch.load(p['best_model'])) save_results_to_disk(p, val_dataloader, model, crf_postprocess=args.crf_postprocess) eval_stats = eval_segmentation_supervised_offline(p, true_val_dataset, verbose=True)
) / query_sample['sal'].numel() if key_area < self.max_area and key_area > self.min_area and query_area < self.max_area and query_area > self.min_area: # Ok. Foreground/Background has proper ratio. return {'key': key_sample, 'query': query_sample} else: count += 1 # Try again. Areas of foreground/background to small. if __name__ == '__main__': import numpy as np from matplotlib import pyplot as plt from utils.common_config import get_train_dataset, get_train_transformations p = {'train_db_name': 'VOCSegmentation', 'overfit': False} transform = get_train_transformations('strong') base_dataset = get_train_dataset(p, transform=None) dataset = DatasetKeyQuery(base_dataset, transform, downsample_sal=False) for i, sample in enumerate(dataset): fig, axes = plt.subplots(4) key = np.transpose(sample['key']['image'].numpy(), (1, 2, 0)) key = 255 * (key * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406])) query = np.transpose(sample['query']['image'].numpy(), (1, 2, 0)) query = 255 * (query * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406])) sal_query = sample['query']['sal'] sal_key = sample['key']['sal'] axes[0].imshow(key.astype(np.uint8)) axes[1].imshow(query.astype(np.uint8)) axes[2].imshow(sal_key)
def main(): # Retrieve config file p = create_config(args.config_env, args.config_exp) print(colored(p, 'red')) # Model print(colored('Retrieve model', 'blue')) model = get_model(p) print('Model is {}'.format(model.__class__.__name__)) print('Model parameters: {:.2f}M'.format( sum(p.numel() for p in model.parameters()) / 1e6)) print(model) model = model.cuda() # CUDNN print(colored('Set CuDNN benchmark', 'blue')) torch.backends.cudnn.benchmark = True # Dataset val_transforms = get_val_transformations(p) print('Validation transforms:', val_transforms) val_dataset = get_val_dataset(p, val_transforms) val_dataloader = get_val_dataloader(p, val_dataset) print('Dataset contains {} val samples'.format(len(val_dataset))) # Memory Bank print(colored('Build MemoryBank', 'blue')) base_dataset = get_train_dataset( p, val_transforms, split='train') # Dataset w/o augs for knn eval base_dataloader = get_val_dataloader(p, base_dataset) memory_bank_base = MemoryBank(len(base_dataset), p['model_kwargs']['features_dim'], p['num_classes'], p['criterion_kwargs']['temperature']) memory_bank_base.cuda() memory_bank_val = MemoryBank(len(val_dataset), p['model_kwargs']['features_dim'], p['num_classes'], p['criterion_kwargs']['temperature']) memory_bank_val.cuda() # Checkpoint assert os.path.exists(p['pretext_checkpoint']) print( colored('Restart from checkpoint {}'.format(p['pretext_checkpoint']), 'blue')) checkpoint = torch.load(p['pretext_checkpoint'], map_location='cpu') model.load_state_dict(checkpoint) model.cuda() # Save model torch.save(model.state_dict(), p['pretext_model']) # Mine the topk nearest neighbors at the very end (Train) # These will be served as input to the SCAN loss. print( colored( 'Fill memory bank for mining the nearest neighbors (train) ...', 'blue')) fill_memory_bank(base_dataloader, model, memory_bank_base) topk = 20 print('Mine the nearest neighbors (Top-%d)' % (topk)) indices, acc = memory_bank_base.mine_nearest_neighbors(topk) print('Accuracy of top-%d nearest neighbors on train set is %.2f' % (topk, 100 * acc)) np.save(p['topk_neighbors_train_path'], indices) # Mine the topk nearest neighbors at the very end (Val) # These will be used for validation. print( colored('Fill memory bank for mining the nearest neighbors (val) ...', 'blue')) fill_memory_bank(val_dataloader, model, memory_bank_val) topk = 5 print('Mine the nearest neighbors (Top-%d)' % (topk)) indices, acc = memory_bank_val.mine_nearest_neighbors(topk) print('Accuracy of top-%d nearest neighbors on val set is %.2f' % (topk, 100 * acc)) np.save(p['topk_neighbors_val_path'], indices)
def main(): args = FLAGS.parse_args() p = create_config(args.config_env, args.config_exp) print(colored(p, 'red')) # CUDNN torch.backends.cudnn.benchmark = True # Data print(colored('Get dataset and dataloaders', 'blue')) train_transformations = get_train_transformations(p) val_transformations = get_val_transformations(p) train_dataset = get_train_dataset(p, train_transformations, split='train', to_neighbors_strangers_dataset = True) val_dataset = get_val_dataset(p, val_transformations, to_neighbors_strangers_dataset = True) train_dataloader = get_train_dataloader(p, train_dataset) val_dataloader = get_val_dataloader(p, val_dataset) print('Train transforms:', train_transformations) print('Validation transforms:', val_transformations) print('Train samples %d - Val samples %d' %(len(train_dataset), len(val_dataset))) # Model print(colored('Get model', 'blue')) model = get_model(p, p['pretext_model']) print(model) model = torch.nn.DataParallel(model) model = model.cuda() # Optimizer print(colored('Get optimizer', 'blue')) optimizer = get_optimizer(p, model, p['update_cluster_head_only']) print(optimizer) # Warning if p['update_cluster_head_only']: print(colored('WARNING: SCAN will only update the cluster head', 'red')) # Loss function print(colored('Get loss', 'blue')) criterion = get_criterion(p) criterion.cuda() print(criterion) if args.mode == 'train': # Checkpoint if os.path.exists(p['scanf_checkpoint']): print(colored('Restart from checkpoint {}'.format(p['scanf_checkpoint']), 'blue')) checkpoint = torch.load(p['scanf_checkpoint'], map_location='cpu') model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) start_epoch = checkpoint['epoch'] best_loss = checkpoint['best_loss'] best_loss_head = checkpoint['best_loss_head'] else: print(colored('No checkpoint file at {}'.format(p['scanf_checkpoint']), 'blue')) start_epoch = 0 best_loss = 1e4 best_loss_head = None # Main loop print(colored('Starting main loop', 'blue')) for epoch in range(start_epoch, p['epochs']): print(colored('Epoch %d/%d' %(epoch+1, p['epochs']), 'yellow')) print(colored('-'*15, 'yellow')) # Adjust lr lr = adjust_learning_rate(p, optimizer, epoch) print('Adjusted learning rate to {:.5f}'.format(lr)) # Train print('Train ...') scanf_train(train_dataloader, model, criterion, optimizer, epoch, p['update_cluster_head_only']) # Evaluate print('Make prediction on validation set ...') predictions = get_predictions(p, val_dataloader, model) print('Evaluate based on SCAN loss ...') scanf_stats = scanf_evaluate(predictions) print(scanf_stats) lowest_loss_head = scanf_stats['lowest_loss_head'] lowest_loss = scanf_stats['lowest_loss'] if lowest_loss < best_loss: print('New lowest loss on validation set: %.4f -> %.4f' %(best_loss, lowest_loss)) print('Lowest loss head is %d' %(lowest_loss_head)) best_loss = lowest_loss best_loss_head = lowest_loss_head torch.save({'model': model.module.state_dict(), 'head': best_loss_head}, p['scanf_model']) else: print('No new lowest loss on validation set: %.4f -> %.4f' %(best_loss, lowest_loss)) print('Lowest loss head is %d' %(best_loss_head)) print('Evaluate with hungarian matching algorithm ...') clustering_stats = hungarian_evaluate(lowest_loss_head, predictions, compute_confusion_matrix=False) print(clustering_stats) # Checkpoint print('Checkpoint ...') torch.save({'optimizer': optimizer.state_dict(), 'model': model.state_dict(), 'epoch': epoch + 1, 'best_loss': best_loss, 'best_loss_head': best_loss_head}, p['scanf_checkpoint']) # Evaluate and save the final model print(colored('Evaluate best model based on SCAN metric at the end', 'blue')) model_checkpoint = torch.load(p['scanf_model'], map_location='cpu') model.module.load_state_dict(model_checkpoint['model']) predictions = get_predictions(p, val_dataloader, model) gt_targets = predictions[model_checkpoint['head']]['targets'] cluster_predictions = predictions[model_checkpoint['head']]['predictions'] print(gt_targets.shape) print(cluster_predictions.shape) torch.save(gt_targets, 'scanf_gt_targets.pth.tar') torch.save(cluster_predictions, 'scanf_cluster_predictions.pth.tar') clustering_stats = hungarian_evaluate(model_checkpoint['head'], predictions, class_names=val_dataset.dataset.classes, compute_confusion_matrix=True, confusion_matrix_file=os.path.join(p['scanf_dir'], 'confusion_matrix.png')) print(clustering_stats) print('Final Accuracy:', clustering_stats['ACC'])
def main(args): # Retrieve config file p = create_config(args.config_env, args.config_exp) print(colored(p, 'red')) # Model print(colored('Retrieve model', 'blue')) model = get_model(p) print('Model is {}'.format(model.__class__.__name__)) print('Model parameters: {:.2f}M'.format(sum(p.numel() for p in model.parameters()) / 1e6)) print(model) # from torchsummary import summary # summary(model, (3, p['transformation_kwargs']['crop_size'], p['transformation_kwargs']['crop_size'])) model = model.cuda() # CUDNN print(colored('Set CuDNN benchmark', 'blue')) torch.backends.cudnn.benchmark = True # Dataset print(colored('Retrieve dataset', 'blue')) train_transforms = get_train_transformations(p) print('Train transforms:', train_transforms) val_transforms = get_val_transformations(p) print('Validation transforms:', val_transforms) train_dataset = get_train_dataset(p, train_transforms, to_augmented_dataset=True, split='train+unlabeled') # Split is for stl-10 val_dataset = get_val_dataset(p, val_transforms) train_dataloader = get_train_dataloader(p, train_dataset) val_dataloader = get_val_dataloader(p, val_dataset) print('Dataset contains {}/{} train/val samples'.format(len(train_dataset), len(val_dataset))) # Criterion print(colored('Retrieve criterion', 'blue')) criterion = get_criterion(p) print('Criterion is {}'.format(criterion.__class__.__name__)) criterion = criterion.cuda() # Checkpoint # p['pretext_checkpoint'] = p['pretext_checkpoint'].replace('checkpoint.pth.tar', '2nd_94306c9_checkpoint.pth.tar') # Specific model assert os.path.exists(p['pretext_checkpoint']), "Checkpoint not found - can't fine-tune." print(colored('Restart from checkpoint {}'.format(p['pretext_checkpoint']), 'blue')) checkpoint = torch.load(p['pretext_checkpoint'], map_location='cpu') # optimizer.load_state_dict(checkpoint['optimizer']) model.load_state_dict(checkpoint['model']) model.cuda() # start_epoch = checkpoint['epoch'] start_epoch = 0 # Train linear model from representations to evaluate attributes classification print(colored('Train linear', 'blue')) for parameter in model.parameters(): parameter.requires_grad = False # model = nn.Sequential(model, AttributesHead(p['model_kwargs']['features_dim'], p['num_attribute_classes'])) model.contrastive_head = nn.Sequential(model.contrastive_head, nn.Linear(p['model_kwargs']['features_dim'], p['num_attribute_classes'])) model.cuda() # Optimizer and scheduler print(colored('Retrieve optimizer', 'blue')) optimizer = get_optimizer(p, model) print(optimizer) # Training print(colored('Starting main loop', 'blue')) for epoch in range(start_epoch, p['epochs']): print(colored('Epoch %d/%d' % (epoch, p['epochs']), 'yellow')) print(colored('-' * 15, 'yellow')) # Adjust lr lr = adjust_learning_rate(p, optimizer, epoch) print('Adjusted learning rate to {:.5f}'.format(lr)) # Train print('Train ...') simclr_fine_tune_train(train_dataloader, model, criterion, optimizer, epoch) # Evaluate acc = attributes_evaluate(val_dataloader, model) print('Val set accuracy %.2f' % acc) # Checkpoint print('Checkpoint ...') torch.save({'optimizer': optimizer.state_dict(), 'model': model.state_dict(), 'epoch': epoch + 1}, p['pretext_fine_tune_checkpoint']) # Save final model torch.save(model.state_dict(), p['pretext_fine_tune_model'])