def few_shot_loop(options): Print = get_printer(options) # In --no_distributed / single GPU mode, the GPU id may not be the local rank options.cuda_device = f"cuda:{get_gpu_ids()[0]}" # distributed stuff if options.distributed: gpus = get_gpu_ids() options.cuda_device = f"cuda:{options.local_rank}" torch.cuda.set_device(options.local_rank) if options.distributed: torch.distributed.init_process_group(backend='nccl', init_method="env://", world_size=len(gpus), rank=options.local_rank) # define sklearn.pipeline.Pipeline to be applied to network outputs if options.model == 'MoCoModel': model = get_model(options) episode_loader = getattr( episode_strat, options.episode_strat)(options).episode_loader(options) elif options.model == "SelfLabelModel": model = get_model(options) episode_loader = getattr( episode_strat, options.episode_strat)(options).episode_loader(options) elif options.model == "SimCLRModel": model, old_opts = get_old_state(options) episode_loader = getattr( episode_strat, options.episode_strat)(old_opts).episode_loader(options) else: raise NotImplementedError( f"Few Shot on {options.model} not implemented") score_track = AverageMeter() time_track = AverageMeter() model.eval() get_pre_classifier_pipeline(options, model) classifier = getattr(testing_strat, options.testing_strat) for full_data, full_labels in episode_loader: start_time = time.time() full_data = model(full_data.to(options.cuda_device)) score = classifier(options, full_data, full_labels) score_track.accumulate(score) time_track.accumulate(time.time() - start_time) m, h = score_track.conf() Print(f"({time_track.latest():.3f}s avg / {time_track.total():.3f}s) " f"{m*100:.4f} \u00b1 {h*100:.4f}") return
def train_loop(options): Print = get_printer(options) Print(options) Save = get_func_on_master(torch.save, options) # distributed stuff gpus = get_gpu_ids() options.cuda_device = f"cuda:{options.local_rank}" torch.cuda.set_device(options.local_rank) if options.distributed: torch.distributed.init_process_group(backend='nccl', init_method="env://", world_size=len(gpus), rank=options.local_rank) model = get_model(options) # Print(model) dataset = getattr(datasets, options.dataset)(options) if options.use_trainval: train_loader = get_loader(dataset.trainval_set, options) else: train_loader = get_loader(dataset.train_set, options) num_train_classes = len(train_loader.dataset.classes) # Switch off for validation and testing options.shuffle = False plain_train_loader = get_loader(dataset.plain_train_set, options) test_loader = get_loader(dataset.test_set, options) num_test_classes = len(test_loader.dataset.classes) valid_loader = get_loader(dataset.valid_set, options) num_valid_classes = len(valid_loader.dataset.classes) criterion = getattr(losses, options.loss_function)(options) final_optimizer = get_optimizer(model, options) scheduler = get_scheduler(final_optimizer, options) time_track = AverageMeter() best_model_state = model.state_dict() loss_val = Value(1e6, min, name="loss") loss_printer = ValuePrinter() loss_printer.track(loss_val) test_eval = Value(-1e6, max, name="test_acc") val_eval = Value(-1e6, max, name="val_acc") # train_eval = Value(-1e6, max, name="train_acc") eval_printer = ValuePrinter() # eval_printer.track(train_eval) eval_printer.track(val_eval) eval_printer.track(test_eval) Print((f"Starting Training on:\n" f"Train: {num_train_classes:>3d} classes\n" f"Valid: {num_valid_classes:>3d} classes\n" f"Test: {num_test_classes:>3d} classes")) Print("-" * 18) for epoch in range(options.num_epochs): model.train() epoch_loss_track = AverageMeter() # epoch start epoch_start = time.time() for aug1, aug2, _ in train_loader: final_optimizer.zero_grad() feat1 = model(aug1.to(device=options.cuda_device)) feat2 = model(aug2.to(device=options.cuda_device)) loss = criterion(feat1, feat2) loss.backward() final_optimizer.step() epoch_loss_track.accumulate(loss.item()) scheduler.step() # epoch end time_track.accumulate(time.time() - epoch_start) loss_val.update(epoch_loss_track.value()) Print( f"({time_track.latest():>7.3f}s) Epoch {epoch+1:0>3}/{options.num_epochs:>3}:", end='') Print(loss_printer.get_formatted_line(), end='') if loss_val.current_is_best: best_model_state = model.state_dict() if options.local_rank == 0 and epoch % options.eval_freq == options.eval_freq - 1: eval_start = time.time() model.eval() # train_eval.update(kmeans_on_data(model, plain_train_loader, options)) val_eval.update(kmeans_on_data(model, valid_loader, options)) test_eval.update(kmeans_on_data(model, test_loader, options)) model.train() eval_time = time.time() - eval_start Print(f" ({eval_time:>7.3f}s) ", end='') Print(eval_printer.get_formatted_line(), end='') Print() Print(( f"Training for {options.num_epochs} epochs took {time_track.total():.3f}s total " f"and {time_track.value():.3f}s average")) Print( "Calculating mean of transformed dataset using the best model state ...", end='') # since this is what will be saved later model.load_state_dict(best_model_state) model.eval() scaler = StandardScaler(copy=False, with_std=False) mean_time = time.time() for data, _ in plain_train_loader: # feat = model.module.backbone(data.to(device=options.cuda_device)).detach().cpu().numpy() feat = model( data.to(device=options.cuda_device)).detach().cpu().numpy() scaler.partial_fit(feat) mean_time = time.time() - mean_time Print(f" {mean_time:.3f}s") options.train_scaler = scaler options.log_file = options.log_file.name Print(f"Saving best model and options to {options.save_path}") save_dict = {'option': options} if options.save_model: save_dict['model_state_dict'] = best_model_state Save(save_dict, options.save_path)
def fine_tune(options): # get print and save functions Print = get_printer(options) Save = get_func_on_master(torch.save, options) # get old_options model, old_opts = get_old_state(options) # subsample old dataset dataset = getattr(datasets, old_opts.dataset)(old_opts).plain_train_set indices = choose_indices(options, dataset) loader = torch.utils.data.DataLoader(torch.utils.data.Subset( dataset, indices), batch_size=options.batch_size) full_loader = torch.utils.data.DataLoader(dataset, batch_size=options.batch_size) # complete model num_classes = len(dataset.classes) intermediate_dim = int((num_classes + old_opts.projection_dim) / 2) full_model = torch.nn.Sequential( model, torch.nn.Linear(old_opts.projection_dim, intermediate_dim), torch.nn.ReLU(inplace=True), torch.nn.Linear(intermediate_dim, num_classes), torch.nn.LogSoftmax(dim=1)).to(device=options.cuda_device) # get loss criterion = torch.nn.NLLLoss() optimizer = get_optimizer(full_model, options) scheduler = get_scheduler(optimizer, options) # train for num_epochs full_model.train() # pretty printer for loss loss_val = Value(1e6, min, name="loss") loss_printer = ValuePrinter() loss_printer.track(loss_val) timer = AverageMeter() for epoch in range(options.fine_tune_epochs): t = time.time() epoch_loss = AverageMeter() for data, labels in loader: Print('.', end='') optimizer.zero_grad() out = full_model(data.to(device=options.cuda_device)) loss = criterion(out, labels.to(device=options.cuda_device)) loss.backward() optimizer.step() epoch_loss.accumulate(loss.item()) scheduler.step() loss_val.update(epoch_loss.value()) timer.accumulate(time.time() - t) Print( f" ({timer.latest():>6.2f}s) epoch {epoch+1:>3}/{options.fine_tune_epochs:>3}:" f"{loss_printer.get_formatted_line()}") Print( f"Fine tuning: {timer.total():.3f}s {options.fine_tune_epochs} epochs / {timer.value():.3f}s avg" ) # evaluate on train set once for sanity full_model.eval() acc = AverageMeter() for data, labels in full_loader: predicts = full_model(data.to(device=options.cuda_device)) predicts = predicts.argmax(dim=1) labels = labels.to(device=options.cuda_device) acc.accumulate((predicts == labels).sum().item() / predicts.size(0)) Print( f"Saving old options, model state, and base path to {options.save_path}" ) Save( { 'options': old_opts, 'model_state_dict': model.state_dict(), 'loaded_from': options.load_from.name }, options.save_path) Print(acc.value()) return acc.value()