Python get_train_dataset 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: utils.common_config

메소드/함수: get_train_dataset

hotexamples.com에서의 예제들: 17

Python get_train_dataset - 17개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 utils.common_config.get_train_dataset에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

def main():
    # Retrieve config file
    p = create_config(args.config_env, args.config_exp)
    print(colored(p, 'red'))

    # Get model
    print(colored('Retrieve model', 'blue'))
    model = get_model(p, p['scan_model'])
    print(model)
    model = torch.nn.DataParallel(model)
    model = model.cuda()

    # Get criterion
    print(colored('Get loss', 'blue'))
    criterion = get_criterion(p)
    criterion.cuda()
    print(criterion)

    # CUDNN
    print(colored('Set CuDNN benchmark', 'blue')) 
    torch.backends.cudnn.benchmark = True

    # Optimizer
    print(colored('Retrieve optimizer', 'blue'))
    optimizer = get_optimizer(p, model)
    print(optimizer)

    # Dataset
    print(colored('Retrieve dataset', 'blue'))
    
    # Transforms 
    strong_transforms = get_train_transformations(p)
    val_transforms = get_val_transformations(p)
    train_dataset = get_train_dataset(p, {'standard': val_transforms, 'augment': strong_transforms},
                                        split='train', to_augmented_dataset=True) 
    train_dataloader = get_train_dataloader(p, train_dataset)

예제 #2

파일 보기

def main():
    # Retrieve config file
    p = create_config(args)
    print(colored(p, 'red'))

    # Model
    print(colored('Retrieve model', 'blue'))
    model = get_model(p)
    print('Model is {}'.format(model.__class__.__name__))
    print(model)
    model = torch.nn.DataParallel(model)
    model = model.cuda()

    # CUDNN
    print(colored('Set CuDNN benchmark', 'blue'))
    torch.backends.cudnn.benchmark = True

    # Dataset
    print(colored('Retrieve dataset', 'blue'))
    transforms = get_val_transformations(p)
    train_dataset = get_train_dataset(p, transforms)
    val_dataset = get_val_dataset(p, transforms)
    train_dataloader = get_val_dataloader(p, train_dataset)
    val_dataloader = get_val_dataloader(p, val_dataset)
    print('Dataset contains {}/{} train/val samples'.format(
        len(train_dataset), len(val_dataset)))

    # Memory Bank
    print(colored('Build MemoryBank', 'blue'))
    memory_bank_train = MemoryBank(len(train_dataset), 2048, p['num_classes'],
                                   p['temperature'])
    memory_bank_train.cuda()
    memory_bank_val = MemoryBank(len(val_dataset), 2048, p['num_classes'],
                                 p['temperature'])
    memory_bank_val.cuda()

    # Load the official MoCoV2 checkpoint
    print(colored('Downloading moco v2 checkpoint', 'blue'))
    os.system(
        'wget -L https://dl.fbaipublicfiles.com/moco/moco_checkpoints/moco_v2_800ep/moco_v2_800ep_pretrain.pth.tar'
    )
    moco_state = torch.load('moco_v2_800ep_pretrain.pth.tar',
                            map_location='cpu')

    # Transfer moco weights
    print(colored('Transfer MoCo weights to model', 'blue'))
    new_state_dict = {}
    state_dict = moco_state['state_dict']
    for k in list(state_dict.keys()):
        # Copy backbone weights
        if k.startswith('module.encoder_q'
                        ) and not k.startswith('module.encoder_q.fc'):
            new_k = 'module.backbone.' + k[len('module.encoder_q.'):]
            new_state_dict[new_k] = state_dict[k]

        # Copy mlp weights
        elif k.startswith('module.encoder_q.fc'):
            new_k = 'module.contrastive_head.' + k[len('module.encoder_q.fc.'
                                                       ):]
            new_state_dict[new_k] = state_dict[k]

        else:
            raise ValueError('Unexpected key {}'.format(k))

    model.load_state_dict(new_state_dict)
    os.system('rm -rf moco_v2_800ep_pretrain.pth.tar')

    # Save final model
    print(colored('Save pretext model', 'blue'))
    torch.save(model.module.state_dict(), p['pretext_model'])
    model.module.contrastive_head = torch.nn.Identity(
    )  # In this case, we mine the neighbors before the MLP.

    # Mine the topk nearest neighbors (Train)
    # These will be used for training with the SCAN-Loss.
    topk = 50
    print(
        colored('Mine the nearest neighbors (Train)(Top-%d)' % (topk), 'blue'))
    transforms = get_val_transformations(p)
    train_dataset = get_train_dataset(p, transforms)
    fill_memory_bank(train_dataloader, model, memory_bank_train)
    indices, acc = memory_bank_train.mine_nearest_neighbors(topk)
    print('Accuracy of top-%d nearest neighbors on train set is %.2f' %
          (topk, 100 * acc))
    np.save(p['topk_neighbors_train_path'], indices)

    # Mine the topk nearest neighbors (Validation)
    # These will be used for validation.
    topk = 5
    print(colored('Mine the nearest neighbors (Val)(Top-%d)' % (topk), 'blue'))
    fill_memory_bank(val_dataloader, model, memory_bank_val)
    print('Mine the neighbors')
    indices, acc = memory_bank_val.mine_nearest_neighbors(topk)
    print('Accuracy of top-%d nearest neighbors on val set is %.2f' %
          (topk, 100 * acc))
    np.save(p['topk_neighbors_val_path'], indices)

예제 #3

파일 보기

def main():
    args = FLAGS.parse_args()
    p = create_config(args.config_env, args.config_exp, args.tb_run)
    print(colored(p, 'red'))

    # CUDNN
    torch.backends.cudnn.benchmark = True

    # Data
    print(colored('Get dataset and dataloaders', 'blue'))
    train_transformations = get_train_transformations(p)
    val_transformations = get_val_transformations(p)
    train_dataset = get_train_dataset(p, train_transformations,
                                      split='train', to_similarity_dataset=True)
    val_dataset = get_val_dataset(p, val_transformations, to_similarity_dataset=True)
    train_dataloader = get_train_dataloader(p, train_dataset)
    val_dataloader = get_val_dataloader(p, val_dataset)
    print('Train transforms:', train_transformations)
    print('Validation transforms:', val_transformations)
    print('Train samples %d - Val samples %d' % (len(train_dataset), len(val_dataset)))

    # Tensorboard writer
    writer = SummaryWriter(log_dir=p['simpred_tb_dir'])

    # Model
    print(colored('Get model', 'blue'))
    model = get_model(p, p['pretext_model'])
    print(model)
    model = torch.nn.DataParallel(model)
    model = model.cuda()

    # Optimizer
    print(colored('Get optimizer', 'blue'))
    optimizer = get_optimizer(p, model, p['update_cluster_head_only'])
    print(optimizer)

    # Warning
    if p['update_cluster_head_only']:
        print(colored('WARNING: will only update the cluster head', 'red'))

    # Loss function
    print(colored('Get loss', 'blue'))
    criterion = get_criterion(p)
    criterion.cuda()
    print(criterion)

    # Checkpoint
    if os.path.exists(p['simpred_checkpoint']):
        print(colored('Restart from checkpoint {}'.format(p['simpred_checkpoint']), 'blue'))
        checkpoint = torch.load(p['simpred_checkpoint'], map_location='cpu')
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        start_epoch = checkpoint['epoch']
        best_acc = checkpoint['best_acc']

    else:
        print(colored('No checkpoint file at {}'.format(p['simpred_checkpoint']), 'blue'))
        start_epoch = 0
        best_acc = 0

    # Main loop
    print(colored('Starting main loop', 'blue'))

    for epoch in range(start_epoch, p['epochs']):
        print(colored('Epoch %d/%d' % (epoch + 1, p['epochs']), 'yellow'))
        print(colored('-' * 15, 'yellow'))

        # Adjust lr
        lr = adjust_learning_rate(p, optimizer, epoch)
        print('Adjusted learning rate to {:.5f}'.format(lr))

        # Train
        print('Train ...')
        simpred_train(train_dataloader, model, criterion, optimizer, epoch, writer, p['update_cluster_head_only'])

        # Evaluate 
        print('Make prediction on validation set ...')
        predictions = get_predictions(p, val_dataloader, model)

        print('Evaluate based on simpred loss ...')
        simpred_stats = simpred_evaluate(predictions, writer, epoch)
        print(simpred_stats)
        accuracy = simpred_stats['accuracy']

        if accuracy > best_acc:
            print('New highest accuracy on validation set: %.4f -> %.4f' % (best_acc, accuracy))
            best_acc = accuracy
            torch.save({'model': model.module.state_dict()}, p['simpred_model'])

        else:
            print('No new highest accuracy on validation set: %.4f -> %.4f' % (best_acc, accuracy))

        # Checkpoint
        print('Checkpoint ...')
        torch.save({'optimizer': optimizer.state_dict(), 'model': model.state_dict(),
                    'epoch': epoch + 1, 'best_acc': best_acc},
                   p['simpred_checkpoint'])

    # Evaluate and save the final model
    print(colored('Evaluate best model based on simpred metric at the end', 'blue'))
    model_checkpoint = torch.load(p['simpred_model'], map_location='cpu')
    model.module.load_state_dict(model_checkpoint['model'])
    predictions, features, thumbnails = get_predictions(p, val_dataloader, model,
                                                        return_features=True, return_thumbnails=True)
    writer.add_embedding(features, predictions[0]['targets'], thumbnails, p['epochs'], p['simpred_tb_dir'])

예제 #4

파일 보기

def main():
    # Retrieve config file
    p = create_config(args.config_env, args.config_exp)
    print(colored(p, 'red'))

    # Get model
    print(colored('Retrieve model', 'blue'))
    model = get_model(p, p['scan_model'])
    print(model)
    model = torch.nn.DataParallel(model)
    model = model.cuda()

    # Get criterion
    print(colored('Get loss', 'blue'))
    criterion = get_criterion(p)
    criterion.cuda()
    print(criterion)

    # CUDNN
    print(colored('Set CuDNN benchmark', 'blue')) 
    torch.backends.cudnn.benchmark = True

    # Optimizer
    print(colored('Retrieve optimizer', 'blue'))
    optimizer = get_optimizer(p, model)
    print(optimizer)

    # Dataset
    print(colored('Retrieve dataset', 'blue'))
    
    # Transforms 
    strong_transforms = get_train_transformations(p)
    val_transforms = get_val_transformations(p)
    train_dataset = get_train_dataset(p, {'standard': val_transforms, 'augment': strong_transforms},
                                        split='train', to_augmented_dataset=True) 
    train_dataloader = get_train_dataloader(p, train_dataset)
    val_dataset = get_val_dataset(p, val_transforms) 
    val_dataloader = get_val_dataloader(p, val_dataset)
    print(colored('Train samples %d - Val samples %d' %(len(train_dataset), len(val_dataset)), 'yellow'))

    # Checkpoint
    if os.path.exists(p['selflabel_checkpoint']):
        print(colored('Restart from checkpoint {}'.format(p['selflabel_checkpoint']), 'blue'))
        checkpoint = torch.load(p['selflabel_checkpoint'], map_location='cpu')
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])        
        start_epoch = checkpoint['epoch']

    else:
        print(colored('No checkpoint file at {}'.format(p['selflabel_checkpoint']), 'blue'))
        start_epoch = 0

    # EMA
    if p['use_ema']:
        ema = EMA(model, alpha=p['ema_alpha'])
    else:
        ema = None

    # Main loop
    print(colored('Starting main loop', 'blue'))
    
    for epoch in range(start_epoch, p['epochs']):
        print(colored('Epoch %d/%d' %(epoch+1, p['epochs']), 'yellow'))
        print(colored('-'*10, 'yellow'))

        # Adjust lr
        lr = adjust_learning_rate(p, optimizer, epoch)
        print('Adjusted learning rate to {:.5f}'.format(lr))

        # Perform self-labeling 
        print('Train ...')
        selflabel_train(train_dataloader, model, criterion, optimizer, epoch, ema=ema)

        # Evaluate (To monitor progress - Not for validation)
        print('Evaluate ...')
        predictions = get_predictions(p, val_dataloader, model)
        clustering_stats = hungarian_evaluate(0, predictions, compute_confusion_matrix=False) 
        print(clustering_stats)
        
        # Checkpoint
        print('Checkpoint ...')
        torch.save({'optimizer': optimizer.state_dict(), 'model': model.state_dict(), 
                    'epoch': epoch + 1}, p['selflabel_checkpoint'])
        #torch.save(model.module.state_dict(), p['selflabel_model'])
        torch.save(model.module.state_dict(), os.path.join(p['selflabel_dir'], 'model_%d.pth.tar' %(epoch)))
    
    # Evaluate and save the final model
    print(colored('Evaluate model at the end', 'blue'))
    predictions = get_predictions(p, val_dataloader, model)
    clustering_stats = hungarian_evaluate(0, predictions, 
                                class_names=val_dataset.classes,
                                compute_confusion_matrix=True,
                                confusion_matrix_file=os.path.join(p['selflabel_dir'], 'confusion_matrix.png'))
    print(clustering_stats)
    torch.save(model.module.state_dict(), p['selflabel_model'])

예제 #5

파일 보기

파일: simclr.py 프로젝트: acl21/init-pools-dal

def main():

    # Retrieve config file
    p = create_config(args.config_env, args.config_exp)
    print(colored(p, 'red'))
    
    # Model
    print(colored('Retrieve model', 'blue'))
    model = get_model(p)
    print('Model is {}'.format(model.__class__.__name__))
    print('Model parameters: {:.2f}M'.format(sum(p.numel() for p in model.parameters()) / 1e6))
    print(model)
    model = model.cuda()
   
    # CUDNN
    print(colored('Set CuDNN benchmark', 'blue')) 
    torch.backends.cudnn.benchmark = True
    
    # Dataset
    print(colored('Retrieve dataset', 'blue'))
    train_transforms = get_train_transformations(p)
    print('Train transforms:', train_transforms)
    val_transforms = get_val_transformations(p)
    print('Validation transforms:', val_transforms)
    train_dataset = get_train_dataset(p, train_transforms, to_augmented_dataset=True,
                                        split='train+unlabeled') # Split is for stl-10
    val_dataset = get_val_dataset(p, val_transforms) 
    train_dataloader = get_train_dataloader(p, train_dataset)
    val_dataloader = get_val_dataloader(p, val_dataset)
    print('Dataset contains {}/{} train/val samples'.format(len(train_dataset), len(val_dataset)))
    
    # Memory Bank
    print(colored('Build MemoryBank', 'blue'))
    base_dataset = get_train_dataset(p, val_transforms, split='train') # Dataset w/o augs for knn eval
    base_dataloader = get_val_dataloader(p, base_dataset) 
    memory_bank_base = MemoryBank(len(base_dataset), 
                                p['model_kwargs']['features_dim'],
                                p['num_classes'], p['criterion_kwargs']['temperature'])
    memory_bank_base.cuda()
    memory_bank_val = MemoryBank(len(val_dataset),
                                p['model_kwargs']['features_dim'],
                                p['num_classes'], p['criterion_kwargs']['temperature'])
    memory_bank_val.cuda()

    # Criterion
    print(colored('Retrieve criterion', 'blue'))
    criterion = get_criterion(p)
    print('Criterion is {}'.format(criterion.__class__.__name__))
    criterion = criterion.cuda()

    # Optimizer and scheduler
    print(colored('Retrieve optimizer', 'blue'))
    optimizer = get_optimizer(p, model)
    print(optimizer)
 
    # Checkpoint
    if os.path.exists(p['pretext_checkpoint']):
        print(colored('Restart from checkpoint {}'.format(p['pretext_checkpoint']), 'blue'))
        checkpoint = torch.load(p['pretext_checkpoint'], map_location='cpu')
        optimizer.load_state_dict(checkpoint['optimizer'])
        model.load_state_dict(checkpoint['model'])
        model.cuda()
        start_epoch = checkpoint['epoch']

    else:
        print(colored('No checkpoint file at {}'.format(p['pretext_checkpoint']), 'blue'))
        start_epoch = 0
        model = model.cuda()
    
    # Training
    print(colored('Starting main loop', 'blue'))
    for epoch in range(start_epoch, p['epochs']):
        print(colored('Epoch %d/%d' %(epoch, p['epochs']), 'yellow'))
        print(colored('-'*15, 'yellow'))

        # Adjust lr
        lr = adjust_learning_rate(p, optimizer, epoch)
        print('Adjusted learning rate to {:.5f}'.format(lr))
        
        # Train
        print('Train ...')
        simclr_train(train_dataloader, model, criterion, optimizer, epoch)

        # Fill memory bank
        print('Fill memory bank for kNN...')
        fill_memory_bank(base_dataloader, model, memory_bank_base)

        # Evaluate (To monitor progress - Not for validation)
        print('Evaluate ...')
        top1 = contrastive_evaluate(val_dataloader, model, memory_bank_base)
        print('Result of kNN evaluation is %.2f' %(top1)) 
        
        # Checkpoint
        print('Checkpoint ...')
        torch.save({'optimizer': optimizer.state_dict(), 'model': model.state_dict(), 
                    'epoch': epoch + 1}, p['pretext_checkpoint'])
        
        if epoch in [50, 75]:
            # Save final model
#             torch.save(model.state_dict(), p['pretext_model'])

            # Mine the topk nearest neighbors at the very end (Train) 
            # These will be served as input to the SCAN loss.
            print(colored('Fill memory bank for mining the nearest neighbors (train) ...', 'blue'))
            fill_memory_bank(base_dataloader, model, memory_bank_base)
            topk = 20
            print('Mine the nearest neighbors (Top-%d)' %(topk)) 
            indices, acc = memory_bank_base.mine_nearest_neighbors(topk)
            print('Accuracy of top-%d nearest neighbors on train set is %.2f' %(topk, 100*acc))
#             np.save(p['topk_neighbors_train_path'], indices)   


            # Mine the topk nearest neighbors at the very end (Val)
            # These will be used for validation.
            print(colored('Fill memory bank for mining the nearest neighbors (val) ...', 'blue'))
            fill_memory_bank(val_dataloader, model, memory_bank_val)
            topk = 5
            print('Mine the nearest neighbors (Top-%d)' %(topk)) 
            indices, acc = memory_bank_val.mine_nearest_neighbors(topk)
            print('Accuracy of top-%d nearest neighbors on val set is %.2f' %(topk, 100*acc))
#             np.save(p['topk_neighbors_val_path'], indices)  



    # Save final model
    torch.save(model.state_dict(), p['pretext_model'])

    # Mine the topk nearest neighbors at the very end (Train) 
    # These will be served as input to the SCAN loss.
    print(colored('Fill memory bank for mining the nearest neighbors (train) ...', 'blue'))
    fill_memory_bank(base_dataloader, model, memory_bank_base)
    topk = 20
    print('Mine the nearest neighbors (Top-%d)' %(topk)) 
    indices, acc = memory_bank_base.mine_nearest_neighbors(topk)
    print('Accuracy of top-%d nearest neighbors on train set is %.2f' %(topk, 100*acc))
    np.save(p['topk_neighbors_train_path'], indices)   

   
    # Mine the topk nearest neighbors at the very end (Val)
    # These will be used for validation.
    print(colored('Fill memory bank for mining the nearest neighbors (val) ...', 'blue'))
    fill_memory_bank(val_dataloader, model, memory_bank_val)
    topk = 5
    print('Mine the nearest neighbors (Top-%d)' %(topk)) 
    indices, acc = memory_bank_val.mine_nearest_neighbors(topk)
    print('Accuracy of top-%d nearest neighbors on val set is %.2f' %(topk, 100*acc))
    np.save(p['topk_neighbors_val_path'], indices)

예제 #6

파일 보기

파일: main.py 프로젝트: danielp3011/Multi-Task-Learning-PyTorch

def main():

    #try:
    # Retrieve config file
    cv2.setNumThreads(0)
    p = create_config(args.config_env, args.config_exp, args.save_name)
    sys.stdout = Logger(os.path.join(p['output_dir'], 'log_file.txt'))
    print(colored(p, 'red'))

    # Get model
    print(colored('Retrieve model', 'blue'))
    model = get_model(p)
    model = torch.nn.DataParallel(model)
    model = model.cuda()  # device=device)

    # Get criterion
    print(colored('Get loss', 'blue'))
    criterion = get_criterion(p)
    criterion.cuda()  # device=device)
    print(criterion)

    # CUDNN
    print(colored('Set CuDNN benchmark', 'blue'))
    torch.backends.cudnn.benchmark = True

    # Optimizer
    print(colored('Retrieve optimizer', 'blue'))
    optimizer = get_optimizer(p, model)
    print(optimizer)

    # Dataset
    print(colored('Retrieve dataset', 'blue'))

    # Transforms
    train_transforms, val_transforms = get_transformations(p)
    train_dataset = get_train_dataset(p, train_transforms)
    val_dataset = get_val_dataset(p, val_transforms)
    true_val_dataset = get_val_dataset(
        p, None)  # True validation dataset without reshape
    train_dataloader = get_train_dataloader(p, train_dataset)
    val_dataloader = get_val_dataloader(p, val_dataset)
    print('Train samples %d - Val samples %d' %
          (len(train_dataset), len(val_dataset)))
    print('Train transformations:')
    print(train_transforms)
    print('Val transformations:')
    print(val_transforms)

    # Resume from checkpoint
    if os.path.exists(p['checkpoint']):
        print(
            colored('Restart from checkpoint {}'.format(p['checkpoint']),
                    'blue'))
        checkpoint = torch.load(p['checkpoint'], map_location='cpu')
        optimizer.load_state_dict(checkpoint['optimizer'])
        model.load_state_dict(checkpoint['model'])
        start_epoch = checkpoint['epoch']
        best_result = checkpoint['best_result']

    else:
        print(
            colored('No checkpoint file at {}'.format(p['checkpoint']),
                    'blue'))
        start_epoch = 0
        save_model_predictions(p, val_dataloader, model)
        best_result = eval_all_results(p)

    # Main loop
    print(colored('Starting main loop', 'blue'))

    for epoch in range(start_epoch, p['epochs']):
        print(colored('Epoch %d/%d' % (epoch + 1, p['epochs']), 'yellow'))
        print(colored('-' * 10, 'yellow'))

        # Adjust lr
        lr = adjust_learning_rate(p, optimizer, epoch)
        print('Adjusted learning rate to {:.5f}'.format(lr))

        # Train
        print('Train ...')
        eval_train = train_vanilla(p, train_dataloader, model, criterion,
                                   optimizer, epoch)

        # Evaluate
        # Check if need to perform eval first
        if 'eval_final_10_epochs_only' in p.keys(
        ) and p['eval_final_10_epochs_only']:  # To speed up -> Avoid eval every epoch, and only test during final 10 epochs.
            if epoch + 1 > p['epochs'] - 10:
                eval_bool = True
            else:
                eval_bool = False
        else:
            eval_bool = True

        # Perform evaluation
        if eval_bool:
            print('Evaluate ...')
            save_model_predictions(p, val_dataloader, model)
            curr_result = eval_all_results(p)
            improves, best_result = validate_results(p, curr_result,
                                                     best_result)
            if improves:
                print('Save new best model')
                torch.save(model.state_dict(), p['best_model'])

        # Checkpoint
        print('Checkpoint ...')
        torch.save(
            {
                'optimizer': optimizer.state_dict(),
                'model': model.state_dict(),
                'epoch': epoch + 1,
                'best_result': best_result
            }, p['checkpoint'])

    # Evaluate best model at the end
    print(colored('Evaluating best model at the end', 'blue'))
    model.load_state_dict(torch.load(p['checkpoint'])['model'])
    print("Model state dict keys: ", model.state_dict().keys())
    #print("Model state dict all: ", model.state_dict().items())
    save_model_predictions(p, val_dataloader, model)
    eval_stats = eval_all_results(p)
    send_email(target_mail_address_list,
               server_name=server_name,
               exception_message="Success!",
               successfully=True)

예제 #7

파일 보기

파일: cluster_upd.py 프로젝트: ferna11i/scan_unsupervised

def main():
    # Retrieve config file
    p = create_config(args.config_env, args.config_exp)
    print(colored(p, 'red'))

    # Model
    print(colored('Retrieve model', 'green'))
    model = get_model(p)
    print('Model is {}'.format(model.__class__.__name__))
    print('Model parameters: {:.2f}M'.format(
        sum(p.numel() for p in model.parameters()) / 1e6))
    print(model)
    model = model.to(device)

    # CUDNN
    print(colored('Set CuDNN benchmark', 'green'))
    torch.backends.cudnn.benchmark = True

    # Dataset
    print(colored('Retrieve dataset', 'green'))
    train_transforms = get_train_transformations(p)
    print('Train transforms:', train_transforms)
    val_transforms = get_val_transformations(p)
    print('Validation transforms:', val_transforms)
    train_dataset = get_train_dataset(p,
                                      train_transforms,
                                      to_augmented_dataset=True,
                                      split='train')  # Split is for stl-10
    val_dataset = get_val_dataset(p, val_transforms)
    train_dataloader = get_val_dataloader(p, train_dataset)
    val_dataloader = get_val_dataloader(p, val_dataset)
    print('Dataset contains {}/{} train/val samples'.format(
        len(train_dataset), len(val_dataset)))

    # Memory Bank
    print(colored('Build MemoryBank', 'green'))
    base_dataset = get_train_dataset(
        p, val_transforms, split='train')  # Dataset w/o augs for knn eval
    base_dataloader = get_val_dataloader(p, base_dataset)
    memory_bank_base = MemoryBank(len(base_dataset),
                                  p['model_kwargs']['features_dim'],
                                  p['num_classes'],
                                  p['criterion_kwargs']['temperature'])
    memory_bank_base.to(device)
    memory_bank_val = MemoryBank(len(val_dataset),
                                 p['model_kwargs']['features_dim'],
                                 p['num_classes'],
                                 p['criterion_kwargs']['temperature'])
    memory_bank_val.to(device)

    # Checkpoint
    if os.path.exists(p['pretext_checkpoint']):
        print(
            colored(
                'Restart from checkpoint {}'.format(p['pretext_checkpoint']),
                'green'))
        checkpoint = torch.load(p['pretext_checkpoint'], map_location='cpu')
        # optimizer.load_state_dict(checkpoint['optimizer'])
        model.load_state_dict(checkpoint['model'])
        model.to(device)
        # start_epoch = checkpoint['epoch']

    else:
        print(
            colored('No checkpoint file at {}'.format(p['pretext_checkpoint']),
                    'green'))
        start_epoch = 0
        model = model.to(device)

    # # Training
    # print(colored('Starting main loop', 'green'))
    # with torch.no_grad():
    #     model.eval()
    #     total_top1, total_top5, total_num, feature_bank = 0.0, 0.0, 0, []
    #
    #     # progress_bar = tqdm(train_dataloader)
    #     for idx, batch in enumerate(train_dataloader):
    #         images = batch['image'].to(device, non_blocking=True)
    #         # target = batch['target'].to(device, non_blocking=True)
    #
    #         output = model(images)
    #         feature = F.normalize(output, dim=1)
    #         feature_bank.append(feature)
    #
    #         if idx % 25 == 0:
    #             print("Feature bank buidling : {} / {}".format(idx, len(train_dataset)/p["batch_size"]))
    #
    #     # [D, N]
    #     feature_bank = torch.cat(feature_bank, dim=0).t().contiguous()
    #     print(colored("Feature bank created. Similarity index starts now", "green"))
    #     print(feature_bank.size())
    #
    #     for idx, batch in enumerate(train_dataloader):
    #
    #         images = batch['image'].to(device, non_blocking=True)
    #         # target = batch['target'].to(device, non_blocking=True)
    #
    #         output = model(images)
    #         feature = F.normalize(output, dim=1)
    #
    #         sim_indices = knn_predict(feature, feature_bank, "", "", 10, 0.1)
    #
    #         print(sim_indices)
    #
    #         if idx == 10:
    #             break

    # # Mine the topk nearest neighbors at the very end (Train)
    # # These will be served as input to the SCAN loss.
    # print(colored('Fill memory bank for mining the nearest neighbors (train) ...', 'green'))
    # fill_memory_bank(base_dataloader, model, memory_bank_base)
    # topk = 20
    # print('Mine the nearest neighbors (Top-%d)' %(topk))
    # indices, acc = memory_bank_base.mine_nearest_neighbors(topk)
    # print('Accuracy of top-%d nearest neighbors on train set is %.2f' %(topk, 100*acc))
    # np.save(p['topk_neighbors_train_path'], indices)

    # Mine the topk nearest neighbors at the very end (Val)
    # These will be used for validation.
    print(
        colored('Fill memory bank for mining the nearest neighbors (val) ...',
                'green'))
    fill_memory_bank(val_dataloader, model, memory_bank_val)
    topk = 5
    print('Mine the nearest neighbors (Top-%d)' % (topk))
    indices, acc = memory_bank_val.mine_nearest_neighbors(topk)
    print('Accuracy of top-%d nearest neighbors on val set is %.2f' %
          (topk, 100 * acc))
    np.save(p['topk_neighbors_val_path'], indices)

예제 #8

파일 보기

def main():
    # Retrieve config file
    p = create_config(args.config_env, args.config_exp)
    print(colored(p, 'red'))
    
    
    # Model
    print(colored('Retrieve model', 'green'))
    model = get_model(p)
    print('Model is {}'.format(model.__class__.__name__))
    print(model)
    # model = torch.nn.DataParallel(model)
    model = model.to(device)
   
    
    # CUDNN
    print(colored('Set CuDNN benchmark', 'green'))
    torch.backends.cudnn.benchmark = True
    
    
    # Dataset
    print(colored('Retrieve dataset', 'green'))
    transforms = get_val_transformations(p)
    train_dataset = get_train_dataset(p, transforms) 
    val_dataset = get_val_dataset(p, transforms)
    train_dataloader = get_val_dataloader(p, train_dataset)
    val_dataloader = get_val_dataloader(p, val_dataset)
    print('Dataset contains {}/{} train/val samples'.format(len(train_dataset), len(val_dataset)))
    
   
    # Memory Bank
    print(colored('Build MemoryBank', 'green'))
    memory_bank_train = MemoryBank(len(train_dataset), 2048, p['num_classes'], p['temperature'])
    memory_bank_train.to(device)
    memory_bank_val = MemoryBank(len(val_dataset), 2048, p['num_classes'], p['temperature'])
    memory_bank_val.to(device)

    
    # Load the official MoCoV2 checkpoint
    print(colored('Downloading moco v2 checkpoint', 'green'))
    # os.system('wget -L https://dl.fbaipublicfiles.com/moco/moco_checkpoints/moco_v2_800ep/moco_v2_800ep_pretrain.pth.tar')
    # Uploaded the model to Mist : Johan
    moco_state = torch.load(main_dir + model_dir + 'moco_v2_800ep_pretrain.pth.tar', map_location=device)

    
    # Transfer moco weights
    print(colored('Transfer MoCo weights to model', 'green'))
    new_state_dict = {}
    state_dict = moco_state['state_dict']
    # for k in list(state_dict.keys()):
    #     # Copy backbone weights
    #     if k.startswith('module.encoder_q') and not k.startswith('module.encoder_q.fc'):
    #         new_k = 'module.backbone.' + k[len('module.encoder_q.'):]
    #         new_state_dict[new_k] = state_dict[k]
    #
    #     # Copy mlp weights
    #     elif k.startswith('module.encoder_q.fc'):
    #         new_k = 'module.contrastive_head.' + k[len('module.encoder_q.fc.'):]
    #         new_state_dict[new_k] = state_dict[k]
    #
    #     else:
    #         raise ValueError('Unexpected key {}'.format(k))

    #Changed by Johan
    for k, v in state_dict.items():
        if "conv" in k or "bn" in k or "layer" in k:
            new_k = "backbone." + k.split("module.encoder_q.")[1]
            new_state_dict[new_k] = v
        else:
            new_k = "contrastive_head." + k.split("module.encoder_q.fc.")[1]
            new_state_dict[new_k] = v

    model.load_state_dict(new_state_dict)
    # os.system('rm -rf moco_v2_800ep_pretrain.pth.tar')
   
 
    # Save final model
    print(colored('Save pretext model', 'green'))
    torch.save(model.state_dict(), p['pretext_model'])
    # model.contrastive_head = torch.nn.Identity() # In this case, we mine the neighbors before the MLP.
    model.contrastive_head = Identity()

예제 #9

파일 보기

def main():
    args = FLAGS.parse_args()
    p = create_config(args.config_env, args.config_exp)
    print(colored(p, 'red'))

    # CUDNN
    torch.backends.cudnn.benchmark = True

    # Data
    print(colored('Get dataset and dataloaders', 'blue'))
    train_transformations = get_train_transformations(p)
    #val_transformations = get_val_transformations(p)
    train_dataset = get_train_dataset(p, train_transformations, 
                                        split='train', to_neighbors_dataset = True)
    #val_dataset = get_val_dataset(p, val_transformations, to_neighbors_dataset = True)
    train_dataloader = get_train_dataloader(p, train_dataset)
    val_dataloader = get_val_dataloader(p, train_dataset)  #!val_ replaced with train_
    print('Train transforms:', train_transformations)
    #print('Validation transforms:', val_transformations)
    #print('Train samples %d - Val samples %d' %(len(train_dataset), len(val_dataset)))
    
    # Model
    print(colored('Get model', 'blue'))
    model = get_model(p, p['pretext_model'])
    print(model)
    model = torch.nn.DataParallel(model)
    model = model.cuda()

    # Optimizer
    print(colored('Get optimizer', 'blue'))
    optimizer = get_optimizer(p, model, p['update_cluster_head_only'])
    print(optimizer)
    
    # Warning
    if p['update_cluster_head_only']:
        print(colored('WARNING: SCAN will only update the cluster head', 'red'))

    # Loss function
    print(colored('Get loss', 'blue'))
    criterion = get_criterion(p) 
    criterion.cuda()
    print(criterion)

    # Checkpoint
    if os.path.exists(p['scan_checkpoint']):
        print(colored('Restart from checkpoint {}'.format(p['scan_checkpoint']), 'blue'))
        checkpoint = torch.load(p['scan_checkpoint'], map_location='cpu')
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])        
        start_epoch = checkpoint['epoch']
        best_loss = checkpoint['best_loss']
        best_loss_head = checkpoint['best_loss_head']

    else:
        print(colored('No checkpoint file at {}'.format(p['scan_checkpoint']), 'blue'))
        start_epoch = 0
        best_loss = 1e4
        best_loss_head = None
 
    # Main loop
    print(colored('Starting main loop', 'blue'))

    for epoch in range(start_epoch, p['epochs']):
        print(colored('Epoch %d/%d' %(epoch+1, p['epochs']), 'yellow'))
        print(colored('-'*15, 'yellow'))

        # Adjust lr
        lr = adjust_learning_rate(p, optimizer, epoch)
        print('Adjusted learning rate to {:.5f}'.format(lr))

        # Train
        print('Train ...')
        scan_train(train_dataloader, model, criterion, optimizer, epoch, p['update_cluster_head_only'])

        # Evaluate 

        #!!!!!!!!!!!!!!!!!Skipping the next lines because we are not evaluating YET. 
        

        print('Make prediction on validation set ...')
        predictions = get_predictions(p, val_dataloader, model)   #inputting the train data to get the clusters !!

예제 #10

파일 보기

def main():
    args = FLAGS.parse_args()
    p = create_config(args.config_env, args.config_exp, args.tb_run)
    print(colored(p, 'red'))

    # CUDNN
    torch.backends.cudnn.benchmark = True

    # Data
    print(colored('Get dataset and dataloaders', 'blue'))
    train_transformations = get_train_transformations(p)
    val_transformations = get_val_transformations(p)
    train_dataset = get_train_dataset(p,
                                      train_transformations,
                                      use_negatives=not p['use_simpred_model'],
                                      use_simpred=p['use_simpred_model'],
                                      split='train',
                                      to_neighbors_dataset=True)
    val_dataset = get_val_dataset(p,
                                  val_transformations,
                                  use_negatives=not p['use_simpred_model'],
                                  use_simpred=p['use_simpred_model'],
                                  to_neighbors_dataset=True)
    train_dataloader = get_train_dataloader(p, train_dataset)
    val_dataloader = get_val_dataloader(p, val_dataset)
    print('Train transforms:', train_transformations)
    print('Validation transforms:', val_transformations)
    print('Train samples %d - Val samples %d' %
          (len(train_dataset), len(val_dataset)))

    # Tensorboard writer
    writer = SummaryWriter(log_dir=p['scan_tb_dir'])

    # Model
    print(colored('Get model', 'blue'))
    model = get_model(p, p['pretext_model'])
    print(model)
    model = torch.nn.DataParallel(model)
    model = model.cuda()

    # Simpred Model
    if p['use_simpred_model']:
        print(colored('Get simpred model', 'blue'))
        simpred_model = get_model(p, p['simpred_model'], load_simpred=True)
        print(simpred_model)
        simpred_model = torch.nn.DataParallel(simpred_model)
        simpred_model = simpred_model.cuda()
        for param in simpred_model.parameters():
            param.requires_grad = False
    else:
        print('Not using simpred model')
        simpred_model = None

    # Optimizer
    print(colored('Get optimizer', 'blue'))
    optimizer = get_optimizer(p, model, p['update_cluster_head_only'])
    print(optimizer)

    # Warning
    if p['update_cluster_head_only']:
        print(colored('WARNING: SCAN will only update the cluster head',
                      'red'))

    # Loss function
    print(colored('Get loss', 'blue'))
    criterion = get_criterion(p)
    criterion.cuda()
    print(criterion)

    # Checkpoint
    if os.path.exists(p['scan_checkpoint']):
        print(
            colored('Restart from checkpoint {}'.format(p['scan_checkpoint']),
                    'blue'))
        checkpoint = torch.load(p['scan_checkpoint'], map_location='cpu')
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        start_epoch = checkpoint['epoch']
        best_acc = checkpoint['best_acc']
        best_acc_head = checkpoint['best_acc_head']

    else:
        print(
            colored('No checkpoint file at {}'.format(p['scan_checkpoint']),
                    'blue'))
        start_epoch = 0
        best_acc = 0
        best_acc_head = None

    # Main loop
    print(colored('Starting main loop', 'blue'))

    for epoch in range(start_epoch, p['epochs']):
        print(colored('Epoch %d/%d' % (epoch + 1, p['epochs']), 'yellow'))
        print(colored('-' * 15, 'yellow'))

        # Adjust lr
        lr = adjust_learning_rate(p, optimizer, epoch)
        print('Adjusted learning rate to {:.5f}'.format(lr))

        # Train
        print('Train ...')
        umcl_train(train_dataloader, model, simpred_model, criterion,
                   optimizer, epoch, writer, p['update_cluster_head_only'])

        # Evaluate
        print('Make prediction on validation set ...')
        predictions = get_predictions(p, val_dataloader, model)

        print('Evaluate based on similarity accuracy')
        stats = umcl_evaluate(p, val_dataloader, model, simpred_model)
        print(stats)
        highest_acc_head = stats['highest_acc_head']
        highest_acc = stats['highest_acc']

        if highest_acc > best_acc:
            print('New highest accuracy on validation set: %.4f -> %.4f' %
                  (best_acc, highest_acc))
            print('Highest accuracy head is %d' % highest_acc_head)
            best_acc = highest_acc
            best_acc_head = highest_acc_head
            torch.save(
                {
                    'model': model.module.state_dict(),
                    'head': best_acc_head
                }, p['scan_model'])

        else:
            print('No new highest accuracy on validation set: %.4f -> %.4f' %
                  (best_acc, highest_acc))
            print('Highest accuracy head is %d' % highest_acc_head)

        print('Evaluate with hungarian matching algorithm ...')
        clustering_stats = hungarian_evaluate(highest_acc_head,
                                              predictions,
                                              compute_confusion_matrix=False,
                                              tf_writer=writer,
                                              epoch=epoch)
        print(clustering_stats)

        # Checkpoint
        print('Checkpoint ...')
        torch.save(
            {
                'optimizer': optimizer.state_dict(),
                'model': model.state_dict(),
                'epoch': epoch + 1,
                'best_acc': best_acc,
                'best_acc_head': best_acc_head
            }, p['scan_checkpoint'])

    # Evaluate and save the final model
    print(
        colored('Evaluate best model based on similarity accuracy at the end',
                'blue'))
    model_checkpoint = torch.load(p['scan_model'], map_location='cpu')
    model.module.load_state_dict(model_checkpoint['model'])
    predictions, features, thumbnails = get_predictions(p,
                                                        val_dataloader,
                                                        model,
                                                        return_features=True,
                                                        return_thumbnails=True)
    writer.add_embedding(features, predictions[0]['targets'], thumbnails,
                         p['epochs'], p['scan_tb_dir'])
    clustering_stats = hungarian_evaluate(model_checkpoint['head'],
                                          predictions,
                                          class_names=val_dataset.classes,
                                          compute_confusion_matrix=True,
                                          confusion_matrix_file=os.path.join(
                                              p['scan_dir'],
                                              'confusion_matrix.png'))
    print(clustering_stats)

예제 #11

파일 보기

파일: main.py 프로젝트: yzxstore/Unsupervised-Semantic-Segmentation

def main_worker(gpu, ngpus_per_node, args):
    # Retrieve config file
    p = create_config(args.config_env, args.config_exp)

    # Check gpu id
    args.gpu = gpu
    p['gpu'] = gpu
    if args.gpu != 0:

        def print_pass(*args):
            pass

        builtins.print = print_pass
    else:
        sys.stdout = Logger(os.path.join(p['output_dir'], 'log_file.txt'))

    if args.dist_url == "env://" and args.rank == -1:
        args.rank = int(os.environ["RANK"])

    # For multiprocessing distributed training, rank needs to be the
    # global rank among all the processes
    args.rank = args.rank * ngpus_per_node + gpu
    dist.init_process_group(backend=args.dist_backend,
                            init_method=args.dist_url,
                            world_size=args.world_size,
                            rank=args.rank)

    print('Python script is {}'.format(os.path.abspath(__file__)))
    print(colored(p, 'red'))

    # Get model
    print(colored('Retrieve model', 'blue'))
    model = ContrastiveModel(p)
    torch.cuda.set_device(args.gpu)
    model.cuda(args.gpu)

    # Optimizer
    print(colored('Retrieve optimizer', 'blue'))
    optimizer = get_optimizer(p, model.parameters())
    print(optimizer)

    # Nvidia-apex
    if args.nvidia_apex:
        print(colored('Using mixed precision training', 'blue'))
        from apex import amp
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level="O2",
                                          keep_batchnorm_fp32=True,
                                          loss_scale="dynamic")
    else:
        amp = None

    # When using a single GPU per process and per
    # DistributedDataParallel, we need to divide the batch size
    # ourselves based on the total number of GPUs we have
    p['train_batch_size'] = int(p['train_batch_size'] / ngpus_per_node)
    p['num_workers'] = int(
        (p['num_workers'] + ngpus_per_node - 1) / ngpus_per_node)
    model = torch.nn.parallel.DistributedDataParallel(
        model, device_ids=[args.gpu], find_unused_parameters=True)

    # CUDNN
    print(colored('Set CuDNN benchmark', 'blue'))
    torch.backends.cudnn.benchmark = True

    # Dataset
    print(colored('Retrieve dataset', 'blue'))

    # Transforms
    train_transform = get_train_transformations()
    print(train_transform)
    train_dataset = DatasetKeyQuery(
        get_train_dataset(p, transform=None),
        train_transform,
        downsample_sal=not p['model_kwargs']['upsample'])
    train_sampler = torch.utils.data.distributed.DistributedSampler(
        train_dataset)
    train_dataloader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=p['train_batch_size'],
        shuffle=(train_sampler is None),
        num_workers=p['num_workers'],
        pin_memory=True,
        sampler=train_sampler,
        drop_last=True,
        collate_fn=collate_custom)
    print(colored('Train samples %d' % (len(train_dataset)), 'yellow'))
    print(colored(train_dataset, 'yellow'))

    # Resume from checkpoint
    if os.path.exists(p['checkpoint']):
        print(
            colored('Restart from checkpoint {}'.format(p['checkpoint']),
                    'blue'))
        loc = 'cuda:{}'.format(args.gpu)
        checkpoint = torch.load(p['checkpoint'], map_location=loc)
        optimizer.load_state_dict(checkpoint['optimizer'])
        model.load_state_dict(checkpoint['model'])
        if args.nvidia_apex:
            amp.load_state_dict(checkpoint['amp'])
        start_epoch = checkpoint['epoch']

    else:
        print(
            colored('No checkpoint file at {}'.format(p['checkpoint']),
                    'blue'))
        start_epoch = 0
        model = model.cuda()

    # Main loop
    print(colored('Starting main loop', 'blue'))

    for epoch in range(start_epoch, p['epochs']):
        print(colored('Epoch %d/%d' % (epoch + 1, p['epochs']), 'yellow'))
        print(colored('-' * 10, 'yellow'))

        # Adjust lr
        lr = adjust_learning_rate(p, optimizer, epoch)
        print('Adjusted learning rate to {:.5f}'.format(lr))

        # Train
        print('Train ...')
        eval_train = train(p, train_dataloader, model, optimizer, epoch, amp)

        # Checkpoint
        if args.rank % ngpus_per_node == 0:
            print('Checkpoint ...')
            if args.nvidia_apex:
                torch.save(
                    {
                        'optimizer': optimizer.state_dict(),
                        'model': model.state_dict(),
                        'amp': amp.state_dict(),
                        'epoch': epoch + 1
                    }, p['checkpoint'])

            else:
                torch.save(
                    {
                        'optimizer': optimizer.state_dict(),
                        'model': model.state_dict(),
                        'epoch': epoch + 1
                    }, p['checkpoint'])

예제 #12

파일 보기

파일: linearprobe.py 프로젝트: mgwillia/Unsupervised-Classification

def main():
    args = FLAGS.parse_args()
    p = create_config(args.config_env, args.config_exp)
    print(colored(p, 'red'))

    # CUDNN
    torch.backends.cudnn.benchmark = True

    # Data
    print(colored('Get dataset and dataloaders', 'blue'))
    train_transformations = get_train_transformations(p)
    val_transformations = get_val_transformations(p)
    print('Train transforms:', train_transformations)
    print('Validation transforms:', val_transformations)
    train_dataset = get_train_dataset(p, train_transformations, split='train')
    val_dataset = get_val_dataset(p, val_transformations)
    train_dataloader = get_train_dataloader(p, train_dataset)
    val_dataloader = get_val_dataloader(p, val_dataset)
    print('Train samples %d - Val samples %d' %
          (len(train_dataset), len(val_dataset)))

    # Model
    print(colored('Get model', 'blue'))
    model = get_model(p)
    print(model)

    # Optimizer
    print(colored('Get optimizer', 'blue'))
    optimizer = get_optimizer(p, model, p['update_cluster_head_only'])
    print(optimizer)

    # Warning
    if p['update_cluster_head_only']:
        print(
            colored(
                'WARNING: Linear probing will only update the cluster head',
                'red'))

    # Loss function
    print(colored('Get loss', 'blue'))
    criterion = get_criterion(p)
    criterion.cuda()
    print(criterion)

    model = torch.nn.DataParallel(model)
    model = model.cuda()
    state = torch.load(p['pretext_model'], map_location='cpu')
    missing = model.load_state_dict(state, strict=False)
    print('missing components', missing)

    if args.mode == 'train':
        # Checkpoint
        if os.path.exists(p['linearprobe_checkpoint']):
            print(
                colored(
                    'Restart from checkpoint {}'.format(
                        p['linearprobe_checkpoint']), 'blue'))
            checkpoint = torch.load(p['linearprobe_checkpoint'],
                                    map_location='cpu')
            model.load_state_dict(checkpoint['model'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            start_epoch = checkpoint['epoch']
            best_loss = checkpoint['best_loss']

        else:
            print(
                colored(
                    'No checkpoint file at {}'.format(
                        p['linearprobe_checkpoint']), 'blue'))
            start_epoch = 0
            best_loss = 1e4

        # Main loop
        print(colored('Starting main loop', 'blue'))

        for epoch in range(start_epoch, p['epochs']):
            print(colored('Epoch %d/%d' % (epoch + 1, p['epochs']), 'yellow'))
            print(colored('-' * 15, 'yellow'))

            # Adjust lr
            lr = adjust_learning_rate(p, optimizer, epoch)
            print('Adjusted learning rate to {:.5f}'.format(lr))

            # Train
            print('Train ...')
            linearprobe_train(train_dataloader, model, criterion, optimizer,
                              epoch)

            if (epoch + 1) % 5 == 0:
                print('Evaluate based on CE loss ...')
                linearprobe_stats = linearprobe_evaluate(
                    val_dataloader, model, criterion)
                loss = linearprobe_stats['loss']
                if loss < best_loss:
                    best_loss = loss
                    torch.save({'model': model.module.state_dict()},
                               p['linearprobe_model'])

                # Checkpoint
                print('Checkpoint ...')
                print(linearprobe_stats)
                torch.save(
                    {
                        'optimizer': optimizer.state_dict(),
                        'model': model.state_dict(),
                        'epoch': epoch + 1,
                        'best_loss': loss
                    }, p['linearprobe_checkpoint'])

    # Evaluate and save the final model
    print(colored('Evaluate best model', 'blue'))
    model_checkpoint = torch.load(p['linearprobe_model'], map_location='cpu')
    model.module.load_state_dict(model_checkpoint['model'])
    linearprobe_stats = linearprobe_evaluate(val_dataloader, model, criterion)
    print(linearprobe_stats)
    print('Final Accuracy:', linearprobe_stats['accuracy'])

예제 #13

파일 보기

def main():
    cv2.setNumThreads(1)
    # Retrieve config file
    p = create_config(args.config_env, args.config_exp)
    sys.stdout = Logger(p['log_file'])
    print('Python script is {}'.format(os.path.abspath(__file__)))
    print(colored(p, 'red'))

    # Get model
    print(colored('Retrieve model', 'blue'))
    model = get_model(p)
    print(model)
    model = model.cuda()

    # Freeze all layers except final 1 by 1 convolutional layer
    for name, param in model.named_parameters():
        if name not in ['decoder.4.weight', 'decoder.4.bias']:
            param.requires_grad = False

    # Get criterion
    print(colored('Get loss', 'blue'))
    criterion = torch.nn.CrossEntropyLoss(ignore_index=255)
    criterion.cuda()
    print(criterion)

    # CUDNN
    print(colored('Set CuDNN benchmark', 'blue'))
    torch.backends.cudnn.benchmark = True

    # Optimizer
    print(colored('Retrieve optimizer', 'blue'))
    parameters = list(filter(lambda p: p.requires_grad, model.parameters()))
    assert len(parameters) == 2  # decoder.4.weight, decoder.4.bias
    optimizer = get_optimizer(p, parameters)
    print(optimizer)

    # Dataset
    print(colored('Retrieve dataset', 'blue'))
    train_transforms = get_train_transformations()
    val_transforms = get_val_transformations()
    train_dataset = get_train_dataset(p, train_transforms)
    val_dataset = get_val_dataset(p, val_transforms)
    true_val_dataset = get_val_dataset(
        p, None)  # True validation dataset without reshape - For validation.
    train_dataloader = get_train_dataloader(p, train_dataset)
    val_dataloader = get_val_dataloader(p, val_dataset)
    print(
        colored(
            'Train samples %d - Val samples %d' %
            (len(train_dataset), len(val_dataset)), 'yellow'))

    # Resume from checkpoint
    if os.path.exists(p['checkpoint']):
        print(
            colored('Restart from checkpoint {}'.format(p['checkpoint']),
                    'blue'))
        checkpoint = torch.load(p['checkpoint'], map_location='cpu')
        optimizer.load_state_dict(checkpoint['optimizer'])
        model.load_state_dict(checkpoint['model'])
        model.cuda()
        start_epoch = checkpoint['epoch']
        best_epoch = checkpoint['best_epoch']
        best_iou = checkpoint['best_iou']

    else:
        print(
            colored('No checkpoint file at {}'.format(p['checkpoint']),
                    'blue'))
        start_epoch = 0
        best_epoch = 0
        best_iou = 0
        model = model.cuda()

    # Main loop
    print(colored('Starting main loop', 'blue'))

    for epoch in range(start_epoch, p['epochs']):
        print(colored('Epoch %d/%d' % (epoch + 1, p['epochs']), 'yellow'))
        print(colored('-' * 10, 'yellow'))

        # Adjust lr
        lr = adjust_learning_rate(p, optimizer, epoch)
        print('Adjusted learning rate to {:.5f}'.format(lr))

        # Train
        print('Train ...')
        eval_train = train_segmentation_vanilla(
            p,
            train_dataloader,
            model,
            criterion,
            optimizer,
            epoch,
            freeze_batchnorm=p['freeze_batchnorm'])

        # Evaluate online -> This will use batched eval where every image is resized to the same resolution.
        print('Evaluate ...')
        eval_val = eval_segmentation_supervised_online(p, val_dataloader,
                                                       model)
        if eval_val['mIoU'] > best_iou:
            print('Found new best model: %.2f -> %.2f (mIoU)' %
                  (100 * best_iou, 100 * eval_val['mIoU']))
            best_iou = eval_val['mIoU']
            best_epoch = epoch
            torch.save(model.state_dict(), p['best_model'])

        else:
            print('No new best model: %.2f -> %.2f (mIoU)' %
                  (100 * best_iou, 100 * eval_val['mIoU']))
            print('Last best model was found in epoch %d' % (best_epoch))

        # Checkpoint
        print('Checkpoint ...')
        torch.save(
            {
                'optimizer': optimizer.state_dict(),
                'model': model.state_dict(),
                'epoch': epoch + 1,
                'best_epoch': best_epoch,
                'best_iou': best_iou
            }, p['checkpoint'])

    # Evaluate best model at the end -> This will evaluate the predictions on the original resolution.
    print(colored('Evaluating best model at the end', 'blue'))
    model.load_state_dict(torch.load(p['best_model']))
    save_results_to_disk(p,
                         val_dataloader,
                         model,
                         crf_postprocess=args.crf_postprocess)
    eval_stats = eval_segmentation_supervised_offline(p,
                                                      true_val_dataset,
                                                      verbose=True)

예제 #14

파일 보기

파일: dataset.py 프로젝트: yzxstore/Unsupervised-Semantic-Segmentation

            ) / query_sample['sal'].numel()

            if key_area < self.max_area and key_area > self.min_area and query_area < self.max_area and query_area > self.min_area:  # Ok. Foreground/Background has proper ratio.
                return {'key': key_sample, 'query': query_sample}

            else:
                count += 1  # Try again. Areas of foreground/background to small.


if __name__ == '__main__':
    import numpy as np
    from matplotlib import pyplot as plt
    from utils.common_config import get_train_dataset, get_train_transformations
    p = {'train_db_name': 'VOCSegmentation', 'overfit': False}
    transform = get_train_transformations('strong')
    base_dataset = get_train_dataset(p, transform=None)
    dataset = DatasetKeyQuery(base_dataset, transform, downsample_sal=False)

    for i, sample in enumerate(dataset):
        fig, axes = plt.subplots(4)
        key = np.transpose(sample['key']['image'].numpy(), (1, 2, 0))
        key = 255 * (key * np.array([0.229, 0.224, 0.225]) +
                     np.array([0.485, 0.456, 0.406]))
        query = np.transpose(sample['query']['image'].numpy(), (1, 2, 0))
        query = 255 * (query * np.array([0.229, 0.224, 0.225]) +
                       np.array([0.485, 0.456, 0.406]))
        sal_query = sample['query']['sal']
        sal_key = sample['key']['sal']
        axes[0].imshow(key.astype(np.uint8))
        axes[1].imshow(query.astype(np.uint8))
        axes[2].imshow(sal_key)

예제 #15

파일 보기

def main():

    # Retrieve config file
    p = create_config(args.config_env, args.config_exp)
    print(colored(p, 'red'))

    # Model
    print(colored('Retrieve model', 'blue'))
    model = get_model(p)
    print('Model is {}'.format(model.__class__.__name__))
    print('Model parameters: {:.2f}M'.format(
        sum(p.numel() for p in model.parameters()) / 1e6))
    print(model)
    model = model.cuda()

    # CUDNN
    print(colored('Set CuDNN benchmark', 'blue'))
    torch.backends.cudnn.benchmark = True

    # Dataset
    val_transforms = get_val_transformations(p)
    print('Validation transforms:', val_transforms)
    val_dataset = get_val_dataset(p, val_transforms)
    val_dataloader = get_val_dataloader(p, val_dataset)
    print('Dataset contains {} val samples'.format(len(val_dataset)))

    # Memory Bank
    print(colored('Build MemoryBank', 'blue'))
    base_dataset = get_train_dataset(
        p, val_transforms, split='train')  # Dataset w/o augs for knn eval
    base_dataloader = get_val_dataloader(p, base_dataset)
    memory_bank_base = MemoryBank(len(base_dataset),
                                  p['model_kwargs']['features_dim'],
                                  p['num_classes'],
                                  p['criterion_kwargs']['temperature'])
    memory_bank_base.cuda()
    memory_bank_val = MemoryBank(len(val_dataset),
                                 p['model_kwargs']['features_dim'],
                                 p['num_classes'],
                                 p['criterion_kwargs']['temperature'])
    memory_bank_val.cuda()

    # Checkpoint
    assert os.path.exists(p['pretext_checkpoint'])
    print(
        colored('Restart from checkpoint {}'.format(p['pretext_checkpoint']),
                'blue'))
    checkpoint = torch.load(p['pretext_checkpoint'], map_location='cpu')
    model.load_state_dict(checkpoint)
    model.cuda()

    # Save model
    torch.save(model.state_dict(), p['pretext_model'])

    # Mine the topk nearest neighbors at the very end (Train)
    # These will be served as input to the SCAN loss.
    print(
        colored(
            'Fill memory bank for mining the nearest neighbors (train) ...',
            'blue'))
    fill_memory_bank(base_dataloader, model, memory_bank_base)
    topk = 20
    print('Mine the nearest neighbors (Top-%d)' % (topk))
    indices, acc = memory_bank_base.mine_nearest_neighbors(topk)
    print('Accuracy of top-%d nearest neighbors on train set is %.2f' %
          (topk, 100 * acc))
    np.save(p['topk_neighbors_train_path'], indices)

    # Mine the topk nearest neighbors at the very end (Val)
    # These will be used for validation.
    print(
        colored('Fill memory bank for mining the nearest neighbors (val) ...',
                'blue'))
    fill_memory_bank(val_dataloader, model, memory_bank_val)
    topk = 5
    print('Mine the nearest neighbors (Top-%d)' % (topk))
    indices, acc = memory_bank_val.mine_nearest_neighbors(topk)
    print('Accuracy of top-%d nearest neighbors on val set is %.2f' %
          (topk, 100 * acc))
    np.save(p['topk_neighbors_val_path'], indices)

예제 #16

파일 보기

def main():
    args = FLAGS.parse_args()
    p = create_config(args.config_env, args.config_exp)
    print(colored(p, 'red'))

    # CUDNN
    torch.backends.cudnn.benchmark = True

    # Data
    print(colored('Get dataset and dataloaders', 'blue'))
    train_transformations = get_train_transformations(p)
    val_transformations = get_val_transformations(p)
    train_dataset = get_train_dataset(p, train_transformations, 
                                        split='train', to_neighbors_strangers_dataset = True)
    val_dataset = get_val_dataset(p, val_transformations, to_neighbors_strangers_dataset = True)
    train_dataloader = get_train_dataloader(p, train_dataset)
    val_dataloader = get_val_dataloader(p, val_dataset)
    print('Train transforms:', train_transformations)
    print('Validation transforms:', val_transformations)
    print('Train samples %d - Val samples %d' %(len(train_dataset), len(val_dataset)))
    
    # Model
    print(colored('Get model', 'blue'))
    model = get_model(p, p['pretext_model'])
    print(model)
    model = torch.nn.DataParallel(model)
    model = model.cuda()

    # Optimizer
    print(colored('Get optimizer', 'blue'))
    optimizer = get_optimizer(p, model, p['update_cluster_head_only'])
    print(optimizer)
    
    # Warning
    if p['update_cluster_head_only']:
        print(colored('WARNING: SCAN will only update the cluster head', 'red'))

    # Loss function
    print(colored('Get loss', 'blue'))
    criterion = get_criterion(p) 
    criterion.cuda()
    print(criterion)

    if args.mode == 'train':
        # Checkpoint
        if os.path.exists(p['scanf_checkpoint']):
            print(colored('Restart from checkpoint {}'.format(p['scanf_checkpoint']), 'blue'))
            checkpoint = torch.load(p['scanf_checkpoint'], map_location='cpu')
            model.load_state_dict(checkpoint['model'])
            optimizer.load_state_dict(checkpoint['optimizer'])        
            start_epoch = checkpoint['epoch']
            best_loss = checkpoint['best_loss']
            best_loss_head = checkpoint['best_loss_head']

        else:
            print(colored('No checkpoint file at {}'.format(p['scanf_checkpoint']), 'blue'))
            start_epoch = 0
            best_loss = 1e4
            best_loss_head = None
    
        # Main loop
        print(colored('Starting main loop', 'blue'))

        for epoch in range(start_epoch, p['epochs']):
            print(colored('Epoch %d/%d' %(epoch+1, p['epochs']), 'yellow'))
            print(colored('-'*15, 'yellow'))

            # Adjust lr
            lr = adjust_learning_rate(p, optimizer, epoch)
            print('Adjusted learning rate to {:.5f}'.format(lr))

            # Train
            print('Train ...')
            scanf_train(train_dataloader, model, criterion, optimizer, epoch, p['update_cluster_head_only'])

            # Evaluate 
            print('Make prediction on validation set ...')
            predictions = get_predictions(p, val_dataloader, model)

            print('Evaluate based on SCAN loss ...')
            scanf_stats = scanf_evaluate(predictions)
            print(scanf_stats)
            lowest_loss_head = scanf_stats['lowest_loss_head']
            lowest_loss = scanf_stats['lowest_loss']
        
            if lowest_loss < best_loss:
                print('New lowest loss on validation set: %.4f -> %.4f' %(best_loss, lowest_loss))
                print('Lowest loss head is %d' %(lowest_loss_head))
                best_loss = lowest_loss
                best_loss_head = lowest_loss_head
                torch.save({'model': model.module.state_dict(), 'head': best_loss_head}, p['scanf_model'])

            else:
                print('No new lowest loss on validation set: %.4f -> %.4f' %(best_loss, lowest_loss))
                print('Lowest loss head is %d' %(best_loss_head))

            print('Evaluate with hungarian matching algorithm ...')
            clustering_stats = hungarian_evaluate(lowest_loss_head, predictions, compute_confusion_matrix=False)
            print(clustering_stats)     

            # Checkpoint
            print('Checkpoint ...')
            torch.save({'optimizer': optimizer.state_dict(), 'model': model.state_dict(), 
                        'epoch': epoch + 1, 'best_loss': best_loss, 'best_loss_head': best_loss_head},
                        p['scanf_checkpoint'])
    
    # Evaluate and save the final model
    print(colored('Evaluate best model based on SCAN metric at the end', 'blue'))
    model_checkpoint = torch.load(p['scanf_model'], map_location='cpu')
    model.module.load_state_dict(model_checkpoint['model'])
    predictions = get_predictions(p, val_dataloader, model)
    gt_targets = predictions[model_checkpoint['head']]['targets']
    cluster_predictions = predictions[model_checkpoint['head']]['predictions']
    print(gt_targets.shape)
    print(cluster_predictions.shape)
    torch.save(gt_targets, 'scanf_gt_targets.pth.tar')
    torch.save(cluster_predictions, 'scanf_cluster_predictions.pth.tar')
    clustering_stats = hungarian_evaluate(model_checkpoint['head'], predictions, 
                            class_names=val_dataset.dataset.classes, 
                            compute_confusion_matrix=True, 
                            confusion_matrix_file=os.path.join(p['scanf_dir'], 'confusion_matrix.png'))
    print(clustering_stats)
    print('Final Accuracy:', clustering_stats['ACC'])

예제 #17

파일 보기

파일: attributes_fine_tune.py 프로젝트: ShakedDovrat/Unsupervised-Classification

def main(args):
    # Retrieve config file
    p = create_config(args.config_env, args.config_exp)
    print(colored(p, 'red'))

    # Model
    print(colored('Retrieve model', 'blue'))
    model = get_model(p)
    print('Model is {}'.format(model.__class__.__name__))
    print('Model parameters: {:.2f}M'.format(sum(p.numel() for p in model.parameters()) / 1e6))
    print(model)
    # from torchsummary import summary
    # summary(model, (3, p['transformation_kwargs']['crop_size'], p['transformation_kwargs']['crop_size']))
    model = model.cuda()

    # CUDNN
    print(colored('Set CuDNN benchmark', 'blue'))
    torch.backends.cudnn.benchmark = True

    # Dataset
    print(colored('Retrieve dataset', 'blue'))
    train_transforms = get_train_transformations(p)
    print('Train transforms:', train_transforms)
    val_transforms = get_val_transformations(p)
    print('Validation transforms:', val_transforms)
    train_dataset = get_train_dataset(p, train_transforms, to_augmented_dataset=True,
                                      split='train+unlabeled')  # Split is for stl-10
    val_dataset = get_val_dataset(p, val_transforms)
    train_dataloader = get_train_dataloader(p, train_dataset)
    val_dataloader = get_val_dataloader(p, val_dataset)
    print('Dataset contains {}/{} train/val samples'.format(len(train_dataset), len(val_dataset)))

    # Criterion
    print(colored('Retrieve criterion', 'blue'))
    criterion = get_criterion(p)
    print('Criterion is {}'.format(criterion.__class__.__name__))
    criterion = criterion.cuda()

    # Checkpoint
    # p['pretext_checkpoint'] = p['pretext_checkpoint'].replace('checkpoint.pth.tar', '2nd_94306c9_checkpoint.pth.tar')  # Specific model

    assert os.path.exists(p['pretext_checkpoint']), "Checkpoint not found - can't fine-tune."
    print(colored('Restart from checkpoint {}'.format(p['pretext_checkpoint']), 'blue'))
    checkpoint = torch.load(p['pretext_checkpoint'], map_location='cpu')
    # optimizer.load_state_dict(checkpoint['optimizer'])
    model.load_state_dict(checkpoint['model'])
    model.cuda()
    # start_epoch = checkpoint['epoch']
    start_epoch = 0

    # Train linear model from representations to evaluate attributes classification
    print(colored('Train linear', 'blue'))

    for parameter in model.parameters():
        parameter.requires_grad = False
    # model = nn.Sequential(model, AttributesHead(p['model_kwargs']['features_dim'], p['num_attribute_classes']))
    model.contrastive_head = nn.Sequential(model.contrastive_head, nn.Linear(p['model_kwargs']['features_dim'], p['num_attribute_classes']))
    model.cuda()

    # Optimizer and scheduler
    print(colored('Retrieve optimizer', 'blue'))
    optimizer = get_optimizer(p, model)
    print(optimizer)

    # Training
    print(colored('Starting main loop', 'blue'))
    for epoch in range(start_epoch, p['epochs']):
        print(colored('Epoch %d/%d' % (epoch, p['epochs']), 'yellow'))
        print(colored('-' * 15, 'yellow'))

        # Adjust lr
        lr = adjust_learning_rate(p, optimizer, epoch)
        print('Adjusted learning rate to {:.5f}'.format(lr))

        # Train
        print('Train ...')
        simclr_fine_tune_train(train_dataloader, model, criterion, optimizer, epoch)

        # Evaluate
        acc = attributes_evaluate(val_dataloader, model)
        print('Val set accuracy %.2f' % acc)

        # Checkpoint
        print('Checkpoint ...')
        torch.save({'optimizer': optimizer.state_dict(), 'model': model.state_dict(),
                    'epoch': epoch + 1}, p['pretext_fine_tune_checkpoint'])

    # Save final model
    torch.save(model.state_dict(), p['pretext_fine_tune_model'])