def main(): args = FLAGS.parse_args() p = create_config(args.config_env, args.config_exp, args.tb_run) print(colored(p, 'red')) # CUDNN torch.backends.cudnn.benchmark = True # Data print(colored('Get dataset and dataloaders', 'blue')) train_transformations = get_train_transformations(p) val_transformations = get_val_transformations(p) train_dataset = get_train_dataset(p, train_transformations, split='train', to_similarity_dataset=True) val_dataset = get_val_dataset(p, val_transformations, to_similarity_dataset=True) train_dataloader = get_train_dataloader(p, train_dataset) val_dataloader = get_val_dataloader(p, val_dataset) print('Train transforms:', train_transformations) print('Validation transforms:', val_transformations) print('Train samples %d - Val samples %d' % (len(train_dataset), len(val_dataset))) # Tensorboard writer writer = SummaryWriter(log_dir=p['simpred_tb_dir']) # Model print(colored('Get model', 'blue')) model = get_model(p, p['pretext_model']) print(model) model = torch.nn.DataParallel(model) model = model.cuda() # Optimizer print(colored('Get optimizer', 'blue')) optimizer = get_optimizer(p, model, p['update_cluster_head_only']) print(optimizer) # Warning if p['update_cluster_head_only']: print(colored('WARNING: will only update the cluster head', 'red')) # Loss function print(colored('Get loss', 'blue')) criterion = get_criterion(p) criterion.cuda() print(criterion) # Checkpoint if os.path.exists(p['simpred_checkpoint']): print(colored('Restart from checkpoint {}'.format(p['simpred_checkpoint']), 'blue')) checkpoint = torch.load(p['simpred_checkpoint'], map_location='cpu') model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) start_epoch = checkpoint['epoch'] best_acc = checkpoint['best_acc'] else: print(colored('No checkpoint file at {}'.format(p['simpred_checkpoint']), 'blue')) start_epoch = 0 best_acc = 0 # Main loop print(colored('Starting main loop', 'blue')) for epoch in range(start_epoch, p['epochs']): print(colored('Epoch %d/%d' % (epoch + 1, p['epochs']), 'yellow')) print(colored('-' * 15, 'yellow')) # Adjust lr lr = adjust_learning_rate(p, optimizer, epoch) print('Adjusted learning rate to {:.5f}'.format(lr)) # Train print('Train ...') simpred_train(train_dataloader, model, criterion, optimizer, epoch, writer, p['update_cluster_head_only']) # Evaluate print('Make prediction on validation set ...') predictions = get_predictions(p, val_dataloader, model) print('Evaluate based on simpred loss ...') simpred_stats = simpred_evaluate(predictions, writer, epoch) print(simpred_stats) accuracy = simpred_stats['accuracy'] if accuracy > best_acc: print('New highest accuracy on validation set: %.4f -> %.4f' % (best_acc, accuracy)) best_acc = accuracy torch.save({'model': model.module.state_dict()}, p['simpred_model']) else: print('No new highest accuracy on validation set: %.4f -> %.4f' % (best_acc, accuracy)) # Checkpoint print('Checkpoint ...') torch.save({'optimizer': optimizer.state_dict(), 'model': model.state_dict(), 'epoch': epoch + 1, 'best_acc': best_acc}, p['simpred_checkpoint']) # Evaluate and save the final model print(colored('Evaluate best model based on simpred metric at the end', 'blue')) model_checkpoint = torch.load(p['simpred_model'], map_location='cpu') model.module.load_state_dict(model_checkpoint['model']) predictions, features, thumbnails = get_predictions(p, val_dataloader, model, return_features=True, return_thumbnails=True) writer.add_embedding(features, predictions[0]['targets'], thumbnails, p['epochs'], p['simpred_tb_dir'])
def main(): args = FLAGS.parse_args() p = create_config(args.config_env, args.config_exp) print(colored(p, 'red')) # CUDNN torch.backends.cudnn.benchmark = True # Data print(colored('Get dataset and dataloaders', 'blue')) train_transformations = get_train_transformations(p) val_transformations = get_val_transformations(p) train_dataset = get_train_dataset(p, train_transformations, split='train', to_neighbors_strangers_dataset = True) val_dataset = get_val_dataset(p, val_transformations, to_neighbors_strangers_dataset = True) train_dataloader = get_train_dataloader(p, train_dataset) val_dataloader = get_val_dataloader(p, val_dataset) print('Train transforms:', train_transformations) print('Validation transforms:', val_transformations) print('Train samples %d - Val samples %d' %(len(train_dataset), len(val_dataset))) # Model print(colored('Get model', 'blue')) model = get_model(p, p['pretext_model']) print(model) model = torch.nn.DataParallel(model) model = model.cuda() # Optimizer print(colored('Get optimizer', 'blue')) optimizer = get_optimizer(p, model, p['update_cluster_head_only']) print(optimizer) # Warning if p['update_cluster_head_only']: print(colored('WARNING: SCAN will only update the cluster head', 'red')) # Loss function print(colored('Get loss', 'blue')) criterion = get_criterion(p) criterion.cuda() print(criterion) if args.mode == 'train': # Checkpoint if os.path.exists(p['scanf_checkpoint']): print(colored('Restart from checkpoint {}'.format(p['scanf_checkpoint']), 'blue')) checkpoint = torch.load(p['scanf_checkpoint'], map_location='cpu') model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) start_epoch = checkpoint['epoch'] best_loss = checkpoint['best_loss'] best_loss_head = checkpoint['best_loss_head'] else: print(colored('No checkpoint file at {}'.format(p['scanf_checkpoint']), 'blue')) start_epoch = 0 best_loss = 1e4 best_loss_head = None # Main loop print(colored('Starting main loop', 'blue')) for epoch in range(start_epoch, p['epochs']): print(colored('Epoch %d/%d' %(epoch+1, p['epochs']), 'yellow')) print(colored('-'*15, 'yellow')) # Adjust lr lr = adjust_learning_rate(p, optimizer, epoch) print('Adjusted learning rate to {:.5f}'.format(lr)) # Train print('Train ...') scanf_train(train_dataloader, model, criterion, optimizer, epoch, p['update_cluster_head_only']) # Evaluate print('Make prediction on validation set ...') predictions = get_predictions(p, val_dataloader, model) print('Evaluate based on SCAN loss ...') scanf_stats = scanf_evaluate(predictions) print(scanf_stats) lowest_loss_head = scanf_stats['lowest_loss_head'] lowest_loss = scanf_stats['lowest_loss'] if lowest_loss < best_loss: print('New lowest loss on validation set: %.4f -> %.4f' %(best_loss, lowest_loss)) print('Lowest loss head is %d' %(lowest_loss_head)) best_loss = lowest_loss best_loss_head = lowest_loss_head torch.save({'model': model.module.state_dict(), 'head': best_loss_head}, p['scanf_model']) else: print('No new lowest loss on validation set: %.4f -> %.4f' %(best_loss, lowest_loss)) print('Lowest loss head is %d' %(best_loss_head)) print('Evaluate with hungarian matching algorithm ...') clustering_stats = hungarian_evaluate(lowest_loss_head, predictions, compute_confusion_matrix=False) print(clustering_stats) # Checkpoint print('Checkpoint ...') torch.save({'optimizer': optimizer.state_dict(), 'model': model.state_dict(), 'epoch': epoch + 1, 'best_loss': best_loss, 'best_loss_head': best_loss_head}, p['scanf_checkpoint']) # Evaluate and save the final model print(colored('Evaluate best model based on SCAN metric at the end', 'blue')) model_checkpoint = torch.load(p['scanf_model'], map_location='cpu') model.module.load_state_dict(model_checkpoint['model']) predictions = get_predictions(p, val_dataloader, model) gt_targets = predictions[model_checkpoint['head']]['targets'] cluster_predictions = predictions[model_checkpoint['head']]['predictions'] print(gt_targets.shape) print(cluster_predictions.shape) torch.save(gt_targets, 'scanf_gt_targets.pth.tar') torch.save(cluster_predictions, 'scanf_cluster_predictions.pth.tar') clustering_stats = hungarian_evaluate(model_checkpoint['head'], predictions, class_names=val_dataset.dataset.classes, compute_confusion_matrix=True, confusion_matrix_file=os.path.join(p['scanf_dir'], 'confusion_matrix.png')) print(clustering_stats) print('Final Accuracy:', clustering_stats['ACC'])
def main(): # Retrieve config file p = create_config(args.config_env, args.config_exp) print(colored(p, 'red')) # Get model print(colored('Retrieve model', 'blue')) model = get_model(p, p['scan_model']) print(model) model = torch.nn.DataParallel(model) model = model.cuda() # Get criterion print(colored('Get loss', 'blue')) criterion = get_criterion(p) criterion.cuda() print(criterion) # CUDNN print(colored('Set CuDNN benchmark', 'blue')) torch.backends.cudnn.benchmark = True # Optimizer print(colored('Retrieve optimizer', 'blue')) optimizer = get_optimizer(p, model) print(optimizer) # Dataset print(colored('Retrieve dataset', 'blue')) # Transforms strong_transforms = get_train_transformations(p) val_transforms = get_val_transformations(p) train_dataset = get_train_dataset(p, {'standard': val_transforms, 'augment': strong_transforms}, split='train', to_augmented_dataset=True) train_dataloader = get_train_dataloader(p, train_dataset) val_dataset = get_val_dataset(p, val_transforms) val_dataloader = get_val_dataloader(p, val_dataset) print(colored('Train samples %d - Val samples %d' %(len(train_dataset), len(val_dataset)), 'yellow')) # Checkpoint if os.path.exists(p['selflabel_checkpoint']): print(colored('Restart from checkpoint {}'.format(p['selflabel_checkpoint']), 'blue')) checkpoint = torch.load(p['selflabel_checkpoint'], map_location='cpu') model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) start_epoch = checkpoint['epoch'] else: print(colored('No checkpoint file at {}'.format(p['selflabel_checkpoint']), 'blue')) start_epoch = 0 # EMA if p['use_ema']: ema = EMA(model, alpha=p['ema_alpha']) else: ema = None # Main loop print(colored('Starting main loop', 'blue')) for epoch in range(start_epoch, p['epochs']): print(colored('Epoch %d/%d' %(epoch+1, p['epochs']), 'yellow')) print(colored('-'*10, 'yellow')) # Adjust lr lr = adjust_learning_rate(p, optimizer, epoch) print('Adjusted learning rate to {:.5f}'.format(lr)) # Perform self-labeling print('Train ...') selflabel_train(train_dataloader, model, criterion, optimizer, epoch, ema=ema) # Evaluate (To monitor progress - Not for validation) print('Evaluate ...') predictions = get_predictions(p, val_dataloader, model) clustering_stats = hungarian_evaluate(0, predictions, compute_confusion_matrix=False) print(clustering_stats) # Checkpoint print('Checkpoint ...') torch.save({'optimizer': optimizer.state_dict(), 'model': model.state_dict(), 'epoch': epoch + 1}, p['selflabel_checkpoint']) #torch.save(model.module.state_dict(), p['selflabel_model']) torch.save(model.module.state_dict(), os.path.join(p['selflabel_dir'], 'model_%d.pth.tar' %(epoch))) # Evaluate and save the final model print(colored('Evaluate model at the end', 'blue')) predictions = get_predictions(p, val_dataloader, model) clustering_stats = hungarian_evaluate(0, predictions, class_names=val_dataset.classes, compute_confusion_matrix=True, confusion_matrix_file=os.path.join(p['selflabel_dir'], 'confusion_matrix.png')) print(clustering_stats) torch.save(model.module.state_dict(), p['selflabel_model'])
def main(): args = FLAGS.parse_args() p = create_config(args.config_env, args.config_exp, args.tb_run) print(colored(p, 'red')) # CUDNN torch.backends.cudnn.benchmark = True # Data print(colored('Get dataset and dataloaders', 'blue')) train_transformations = get_train_transformations(p) val_transformations = get_val_transformations(p) train_dataset = get_train_dataset(p, train_transformations, use_negatives=not p['use_simpred_model'], use_simpred=p['use_simpred_model'], split='train', to_neighbors_dataset=True) val_dataset = get_val_dataset(p, val_transformations, use_negatives=not p['use_simpred_model'], use_simpred=p['use_simpred_model'], to_neighbors_dataset=True) train_dataloader = get_train_dataloader(p, train_dataset) val_dataloader = get_val_dataloader(p, val_dataset) print('Train transforms:', train_transformations) print('Validation transforms:', val_transformations) print('Train samples %d - Val samples %d' % (len(train_dataset), len(val_dataset))) # Tensorboard writer writer = SummaryWriter(log_dir=p['scan_tb_dir']) # Model print(colored('Get model', 'blue')) model = get_model(p, p['pretext_model']) print(model) model = torch.nn.DataParallel(model) model = model.cuda() # Simpred Model if p['use_simpred_model']: print(colored('Get simpred model', 'blue')) simpred_model = get_model(p, p['simpred_model'], load_simpred=True) print(simpred_model) simpred_model = torch.nn.DataParallel(simpred_model) simpred_model = simpred_model.cuda() for param in simpred_model.parameters(): param.requires_grad = False else: print('Not using simpred model') simpred_model = None # Optimizer print(colored('Get optimizer', 'blue')) optimizer = get_optimizer(p, model, p['update_cluster_head_only']) print(optimizer) # Warning if p['update_cluster_head_only']: print(colored('WARNING: SCAN will only update the cluster head', 'red')) # Loss function print(colored('Get loss', 'blue')) criterion = get_criterion(p) criterion.cuda() print(criterion) # Checkpoint if os.path.exists(p['scan_checkpoint']): print( colored('Restart from checkpoint {}'.format(p['scan_checkpoint']), 'blue')) checkpoint = torch.load(p['scan_checkpoint'], map_location='cpu') model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) start_epoch = checkpoint['epoch'] best_acc = checkpoint['best_acc'] best_acc_head = checkpoint['best_acc_head'] else: print( colored('No checkpoint file at {}'.format(p['scan_checkpoint']), 'blue')) start_epoch = 0 best_acc = 0 best_acc_head = None # Main loop print(colored('Starting main loop', 'blue')) for epoch in range(start_epoch, p['epochs']): print(colored('Epoch %d/%d' % (epoch + 1, p['epochs']), 'yellow')) print(colored('-' * 15, 'yellow')) # Adjust lr lr = adjust_learning_rate(p, optimizer, epoch) print('Adjusted learning rate to {:.5f}'.format(lr)) # Train print('Train ...') umcl_train(train_dataloader, model, simpred_model, criterion, optimizer, epoch, writer, p['update_cluster_head_only']) # Evaluate print('Make prediction on validation set ...') predictions = get_predictions(p, val_dataloader, model) print('Evaluate based on similarity accuracy') stats = umcl_evaluate(p, val_dataloader, model, simpred_model) print(stats) highest_acc_head = stats['highest_acc_head'] highest_acc = stats['highest_acc'] if highest_acc > best_acc: print('New highest accuracy on validation set: %.4f -> %.4f' % (best_acc, highest_acc)) print('Highest accuracy head is %d' % highest_acc_head) best_acc = highest_acc best_acc_head = highest_acc_head torch.save( { 'model': model.module.state_dict(), 'head': best_acc_head }, p['scan_model']) else: print('No new highest accuracy on validation set: %.4f -> %.4f' % (best_acc, highest_acc)) print('Highest accuracy head is %d' % highest_acc_head) print('Evaluate with hungarian matching algorithm ...') clustering_stats = hungarian_evaluate(highest_acc_head, predictions, compute_confusion_matrix=False, tf_writer=writer, epoch=epoch) print(clustering_stats) # Checkpoint print('Checkpoint ...') torch.save( { 'optimizer': optimizer.state_dict(), 'model': model.state_dict(), 'epoch': epoch + 1, 'best_acc': best_acc, 'best_acc_head': best_acc_head }, p['scan_checkpoint']) # Evaluate and save the final model print( colored('Evaluate best model based on similarity accuracy at the end', 'blue')) model_checkpoint = torch.load(p['scan_model'], map_location='cpu') model.module.load_state_dict(model_checkpoint['model']) predictions, features, thumbnails = get_predictions(p, val_dataloader, model, return_features=True, return_thumbnails=True) writer.add_embedding(features, predictions[0]['targets'], thumbnails, p['epochs'], p['scan_tb_dir']) clustering_stats = hungarian_evaluate(model_checkpoint['head'], predictions, class_names=val_dataset.classes, compute_confusion_matrix=True, confusion_matrix_file=os.path.join( p['scan_dir'], 'confusion_matrix.png')) print(clustering_stats)
def main(): # Read config file print(colored('Read config file {} ...'.format(args.config_exp), 'blue')) with open(args.config_exp, 'r') as stream: config = yaml.safe_load(stream) config[ 'batch_size'] = 512 # To make sure we can evaluate on a single 1080ti print(config) # Get dataset print(colored('Get validation dataset ...', 'blue')) transforms = get_val_transformations(config) dataset = get_val_dataset(config, transforms) dataloader = get_val_dataloader(config, dataset) print('Number of samples: {}'.format(len(dataset))) # Get model print(colored('Get model ...', 'blue')) model = get_model(config) print(model) # Read model weights print(colored('Load model weights ...', 'blue')) state_dict = torch.load(args.model, map_location='cpu') if config['setup'] in ['simclr', 'moco', 'selflabel']: model.load_state_dict(state_dict) elif config['setup'] == 'scan': model.load_state_dict(state_dict['model']) else: raise NotImplementedError # CUDA model.cuda() # Perform evaluation if config['setup'] in ['simclr', 'moco']: print( colored( 'Perform evaluation of the pretext task (setup={}).'.format( config['setup']), 'blue')) print('Create Memory Bank') if config['setup'] == 'simclr': # Mine neighbors after MLP memory_bank = MemoryBank(len(dataset), config['model_kwargs']['features_dim'], config['num_classes'], config['criterion_kwargs']['temperature']) else: # Mine neighbors before MLP memory_bank = MemoryBank(len(dataset), config['model_kwargs']['features_dim'], config['num_classes'], config['temperature']) memory_bank.cuda() print('Fill Memory Bank') fill_memory_bank(dataloader, model, memory_bank) print('Mine the nearest neighbors') for topk in [1, 5, 20]: # Similar to Fig 2 in paper _, acc = memory_bank.mine_nearest_neighbors(topk) print( 'Accuracy of top-{} nearest neighbors on validation set is {:.2f}' .format(topk, 100 * acc)) elif config['setup'] in ['scan', 'selflabel']: print( colored( 'Perform evaluation of the clustering model (setup={}).'. format(config['setup']), 'blue')) head = state_dict['head'] if config['setup'] == 'scan' else 0 predictions, features = get_predictions(config, dataloader, model, return_features=True) clustering_stats = hungarian_evaluate(head, predictions, dataset.classes, compute_confusion_matrix=True) print(clustering_stats) if args.visualize_prototypes: prototype_indices = get_prototypes(config, predictions[head], features, model) visualize_indices(prototype_indices, dataset, clustering_stats['hungarian_match']) else: raise NotImplementedError
def main(): args = FLAGS.parse_args() p = create_config(args.config_env, args.config_exp) print(colored(p, 'red')) # CUDNN torch.backends.cudnn.benchmark = True # Data print(colored('Get dataset and dataloaders', 'blue')) train_transformations = get_train_transformations(p) #val_transformations = get_val_transformations(p) train_dataset = get_train_dataset(p, train_transformations, split='train', to_neighbors_dataset = True) #val_dataset = get_val_dataset(p, val_transformations, to_neighbors_dataset = True) train_dataloader = get_train_dataloader(p, train_dataset) val_dataloader = get_val_dataloader(p, train_dataset) #!val_ replaced with train_ print('Train transforms:', train_transformations) #print('Validation transforms:', val_transformations) #print('Train samples %d - Val samples %d' %(len(train_dataset), len(val_dataset))) # Model print(colored('Get model', 'blue')) model = get_model(p, p['pretext_model']) print(model) model = torch.nn.DataParallel(model) model = model.cuda() # Optimizer print(colored('Get optimizer', 'blue')) optimizer = get_optimizer(p, model, p['update_cluster_head_only']) print(optimizer) # Warning if p['update_cluster_head_only']: print(colored('WARNING: SCAN will only update the cluster head', 'red')) # Loss function print(colored('Get loss', 'blue')) criterion = get_criterion(p) criterion.cuda() print(criterion) # Checkpoint if os.path.exists(p['scan_checkpoint']): print(colored('Restart from checkpoint {}'.format(p['scan_checkpoint']), 'blue')) checkpoint = torch.load(p['scan_checkpoint'], map_location='cpu') model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) start_epoch = checkpoint['epoch'] best_loss = checkpoint['best_loss'] best_loss_head = checkpoint['best_loss_head'] else: print(colored('No checkpoint file at {}'.format(p['scan_checkpoint']), 'blue')) start_epoch = 0 best_loss = 1e4 best_loss_head = None # Main loop print(colored('Starting main loop', 'blue')) for epoch in range(start_epoch, p['epochs']): print(colored('Epoch %d/%d' %(epoch+1, p['epochs']), 'yellow')) print(colored('-'*15, 'yellow')) # Adjust lr lr = adjust_learning_rate(p, optimizer, epoch) print('Adjusted learning rate to {:.5f}'.format(lr)) # Train print('Train ...') scan_train(train_dataloader, model, criterion, optimizer, epoch, p['update_cluster_head_only']) # Evaluate #!!!!!!!!!!!!!!!!!Skipping the next lines because we are not evaluating YET. print('Make prediction on validation set ...') predictions = get_predictions(p, val_dataloader, model) #inputting the train data to get the clusters !!
print('Evaluate with hungarian matching algorithm ...') clustering_stats = hungarian_evaluate(lowest_loss_head, predictions, compute_confusion_matrix=False) print(clustering_stats) # Checkpoint print('Checkpoint ...') torch.save({'optimizer': optimizer.state_dict(), 'model': model.state_dict(), 'epoch': epoch + 1, 'best_loss': best_loss, 'best_loss_head': best_loss_head}, p['scan_checkpoint']) # Evaluate and save the final model print(colored('Evaluate best model based on SCAN metric at the end', 'blue')) model_checkpoint = torch.load(p['scan_model'], map_location='cpu') model.module.load_state_dict(model_checkpoint['model']) predictions = get_predictions(p, val_dataloader, model) <<<<<<< HEAD # clustering_stats = hungarian_evaluate(model_checkpoint['head'], predictions, # class_names=val_dataset.dataset.classes, # compute_confusion_matrix=True, # confusion_matrix_file=os.path.join(p['scan_dir'], 'confusion_matrix.png')) # print(clustering_stats) ======= clustering_stats = hungarian_evaluate(model_checkpoint['head'], predictions, class_names=val_dataset.dataset.classes, compute_confusion_matrix=True, confusion_matrix_file=os.path.join(p['scan_dir'], 'confusion_matrix.png')) print(clustering_stats) >>>>>>> db23360031c529a04f0a144b63e5f3fe49feb44f if __name__ == "__main__":