def generate_permute_data(permute): input = torch.randint(0, 10000, (dimZ, dimY, dimX)) output = input.permute(permute[0], permute[1], permute[2]) printer = get_printer( lms_clean_root, test_name=f"permute3D_{permute[0]}{permute[1]}{permute[2]}") printer("input.data", input) printer("output.data", output)
def few_shot_loop(options): Print = get_printer(options) # In --no_distributed / single GPU mode, the GPU id may not be the local rank options.cuda_device = f"cuda:{get_gpu_ids()[0]}" # distributed stuff if options.distributed: gpus = get_gpu_ids() options.cuda_device = f"cuda:{options.local_rank}" torch.cuda.set_device(options.local_rank) if options.distributed: torch.distributed.init_process_group(backend='nccl', init_method="env://", world_size=len(gpus), rank=options.local_rank) # define sklearn.pipeline.Pipeline to be applied to network outputs if options.model == 'MoCoModel': model = get_model(options) episode_loader = getattr( episode_strat, options.episode_strat)(options).episode_loader(options) elif options.model == "SelfLabelModel": model = get_model(options) episode_loader = getattr( episode_strat, options.episode_strat)(options).episode_loader(options) elif options.model == "SimCLRModel": model, old_opts = get_old_state(options) episode_loader = getattr( episode_strat, options.episode_strat)(old_opts).episode_loader(options) else: raise NotImplementedError( f"Few Shot on {options.model} not implemented") score_track = AverageMeter() time_track = AverageMeter() model.eval() get_pre_classifier_pipeline(options, model) classifier = getattr(testing_strat, options.testing_strat) for full_data, full_labels in episode_loader: start_time = time.time() full_data = model(full_data.to(options.cuda_device)) score = classifier(options, full_data, full_labels) score_track.accumulate(score) time_track.accumulate(time.time() - start_time) m, h = score_track.conf() Print(f"({time_track.latest():.3f}s avg / {time_track.total():.3f}s) " f"{m*100:.4f} \u00b1 {h*100:.4f}") return
def generate_data(lms_clean_root: str): torch.manual_seed(0) # model input0 = torch.randn(2, 3, 3) input1 = torch.randn(2, 3, 5) output = torch.cat((input0, input1), 2) # printer printer = get_printer(lms_clean_root, test_name = "concat2") printer("input0.data", input0) printer("input1.data", input1) printer("output.data", output)
def generate_data(lms_clean_root: str): torch.manual_seed(0) input = torch.randn(214, 56) input.requires_grad = True loss = torch.transpose(input, 0, 1) loss.sum().backward() # printer printer = get_printer(lms_clean_root, test_name="transpose") printer("input", input, dim=0, degree=2) printer("loss", loss, dim=1, degree=2) printer("input_grad", input.grad, dim=0, degree=2)
def generate_data(lms_clean_root: str): torch.manual_seed(0) # model dimX = 600 dimY = 60 dimZ = 40 input = torch.randint(0, 10000, (dimZ, dimY, dimX)) output = input.permute(1, 0, 2) # printer printer = get_printer(lms_clean_root, test_name="permute_kernel_102_big") printer("input.data", input) printer("output.data", output)
def generate_data(lms_clean_root: str): torch.manual_seed(0) # model input = torch.randn(32, 32) input.requires_grad = True splits = torch.split(input, 16, dim=1) loss = splits[0] loss.sum().backward() # printer printer = get_printer(lms_clean_root, test_name="split_small") printer("input", input, dim=0, degree=2) printer("loss", loss, dim=0, degree=2) printer("input_grad", input.grad, dim=0, degree=2)
def generate_data(lms_clean_root: str): torch.manual_seed(0) # model input = torch.randn(32, 32) input.requires_grad = True m = nn.Tanh() loss = m(input) loss.sum().backward() # printer printer = get_printer(lms_clean_root, test_name = "tanh") printer("input", input, dim=0, degree=2) printer("loss", loss, dim=0, degree=2) printer("input_grad", input.grad, dim=0, degree=2)
def generate_data(lms_clean_root: str): torch.manual_seed(0) # model dimX = 8 dimY = 2 dimZ = 8 input = torch.randn(dimZ, dimY, dimX) input.requires_grad = True loss = input.permute(1, 0, 2) loss.sum().backward() printer = get_printer(lms_clean_root, test_name="permute_102_big") printer("input", input, dim=0, degree=2) printer("loss", loss, dim=1, degree=2) printer("input_grad", input.grad, dim=0, degree=2)
def generate_data(lms_clean_root: str): torch.manual_seed(0) # model d0 = 8 d1 = 8 input = torch.randn(d0, d1) input.requires_grad = True mask = torch.randint(0, 2, (d0, d1)) output = input.masked_fill(mask, 0.0) output.sum().backward() # printer printer = get_printer(lms_clean_root, test_name="maskedFill") printer("input.data", input) printer("mask.data", mask) printer("output.data", output) printer("input_grad.data", input.grad)
def generate_data(lms_clean_root: str): torch.manual_seed(0) # model dimX = 6 dimY = 5 dimZ = 12 permute = [1,2,0] input = torch.randn(dimZ, dimY, dimX) input.requires_grad = True loss = input.permute(permute[0], permute[1], permute[2]) loss.sum().backward() printer = get_printer(lms_clean_root, test_name=f"permute_120") printer("input", input, dim=0, degree=2) printer("loss", loss, dim=2, degree=2) printer("input_grad", input.grad, dim=0, degree=2)
def generate_data(lms_clean_root: str): torch.manual_seed(0) # model input = torch.randn(2, 1, 3, 3) input.requires_grad = True weight = torch.randn(2, 1, 3, 3) weight.requires_grad = True loss = input + torch.sigmoid(weight) loss.sum().backward() # printer printer = get_printer(lms_clean_root, test_name="sigmoid") printer("input", input, dim=0, degree=2) printer("weight", weight, dim=0, degree=2) printer("loss", loss, dim=0, degree=2) printer("weight_grad", weight.grad, dim=0, degree=2) printer("input_grad", input.grad, dim=0, degree=2)
def generate_data(lms_clean_root: str): torch.manual_seed(0) # model input = torch.randn(32, 32) input.requires_grad = True weight = torch.randn(32, 32) weight.requires_grad = True loss = input + torch.ones(32, 32) / weight loss.sum().backward() # printer printer = get_printer(lms_clean_root, test_name="invert") printer("input", input, dim=0, degree=2) printer("weight", weight, dim=0, degree=2) printer("loss", loss, dim=0, degree=2) printer("weight_grad", weight.grad, dim=0, degree=2) printer("input_grad", input.grad, dim=0, degree=2)
def generate_data(lms_clean_root: str): torch.manual_seed(0) # model size = 32 input = torch.randn(size, size) input.requires_grad = True weight = torch.randn(size, size) weight.requires_grad = True loss = input - weight loss.sum().backward() # printer printer = get_printer(lms_clean_root, test_name="negate") printer("input", input, dim=0, degree=2) printer("weight", weight, dim=0, degree=2) printer("loss", loss, dim=0, degree=2) printer("weight_grad", weight.grad, dim=0, degree=2) printer("input_grad", input.grad, dim=0, degree=2)
def generate_data(lms_clean_root: str): torch.manual_seed(0) # model input = torch.randn(32, 32, 8) input.requires_grad = True weight = torch.randn(32, 32, 32) weight.requires_grad = True splits = torch.split(weight, 8, dim=2) loss = input * splits[0] loss.sum().backward() # printer printer = get_printer(lms_clean_root, test_name="split2") printer("input", input, dim=0, degree=2) printer("weight", weight, dim=0, degree=2) printer("loss", loss, dim=0, degree=2) printer("weight_grad", weight.grad, dim=0, degree=2) printer("input_grad", input.grad, dim=0, degree=2)
def generate_data(lms_clean_root: str): torch.manual_seed(0) # model temp = torch.tensor(list(range(0, 2 * 3 * 3)), dtype=torch.float32) input = torch.ones(2, 1, 3, 3) input.requires_grad = True weight = torch.torch.reshape(temp, (2, 1, 3, 3)) weight.requires_grad = True m = torch.nn.Dropout(p=0.0) loss = input + m(weight) loss.sum().backward() # printer printer = get_printer(lms_clean_root, test_name="dropout") printer("input", input, dim=0, degree=2) printer("weight", weight, dim=0, degree=2) printer("loss", loss, dim=0, degree=2) printer("weight_grad", weight.grad, dim=0, degree=2) printer("input_grad", input.grad, dim=0, degree=2)
def generate_data(lms_clean_root: str): torch.manual_seed(0) # compute softmax forward and backward shape = (20, 782) input = torch.randn(shape) input.requires_grad = True m = torch.nn.Softmax(dim=1) output = m(input) output.retain_grad() w = torch.nn.Linear(shape[1], 1) y = w(output) y.sum().backward() # printer printer = get_printer(lms_clean_root, test_name = "softmax") printer("input.data", input) printer("output.data", output) printer("input_grad.data", input.grad) printer("output_grad.data", output.grad)
def generate_data(lms_clean_root: str): torch.manual_seed(0) # model d0 = 2 d1 = 2 d2 = 4 input = torch.randn(d0, d1, d2) output0, output1 = torch.split(input, 2, 2) output0_flat = output0.reshape((d0 * d1 * 2)) output1_flat = output1.reshape((d0 * d1 * 2)) output = torch.cat((output0_flat, output1_flat), 0) # printer printer = get_printer(lms_clean_root, test_name="split2") printer("input.data", input) printer("output0.data", output0) printer("output1.data", output1) printer("output.data", output)
def generate_data(lms_clean_root: str): torch.manual_seed(0) # model input = torch.zeros(2, 1, 9, 9) input.requires_grad = True temp = torch.tensor(list(range(0, 2 * 9 * 9)), dtype=torch.float32) weight = torch.reshape(temp, (2, 1, 9, 9)) weight.requires_grad = True m = torch.nn.MaxPool2d(kernel_size=3, padding=1, stride=1) loss = input + m(weight) loss.sum().backward() # printer printer = get_printer(lms_clean_root, test_name="maxpool") printer("input", input, dim=0, degree=2) printer("weight", weight, dim=0, degree=2) printer("loss", loss, dim=0, degree=2) printer("weight_grad", weight.grad, dim=0, degree=2) printer("input_grad", input.grad, dim=0, degree=2)
def generate_data(lms_clean_root: str): torch.manual_seed(0) # model input = torch.randn(2, 1, 9, 9) input.requires_grad = True weight = torch.randn(2, 1, 9, 9) weight.requires_grad = True mask = torch.randint(0, 2, (2, 1, 9, 9)) loss = input + weight.masked_fill(mask, 1.0) loss.sum().backward() # printer printer = get_printer(lms_clean_root, test_name="maskedFill") printer("input", input, dim=0, degree=2) printer("weight", weight, dim=0, degree=2) printer("mask", mask, dim=0, degree=2) printer("loss", loss, dim=0, degree=2) printer("weight_grad", weight.grad, dim=0, degree=2) printer("input_grad", input.grad, dim=0, degree=2)
def generate_data(lms_clean_root: str): torch.manual_seed(0) # compute softmax forward and backward shape = (2, 1, 32, 533) input = torch.randn(shape) input.requires_grad = True weight = torch.randn(shape) weight.requires_grad = True m = torch.nn.LogSoftmax(dim=3) loss = input + m(weight) loss.sum().backward() # printer printer = get_printer(lms_clean_root, test_name = "logSoftmax") printer("input", input, dim=0, degree=2) printer("weight", weight, dim=0, degree=2) printer("loss", loss, dim=0, degree=2) printer("weight_grad", weight.grad, dim=0, degree=2) printer("input_grad", input.grad, dim=0, degree=2)
def generate_data(lms_clean_root: str): torch.manual_seed(0) # model input = torch.randn(2, 1, 9, 9) input.requires_grad = True weight = torch.randn(2, 1, 3, 3) weight.requires_grad = True loss = F.conv2d(input, weight, stride=(1, 1), padding=(1, 1), dilation=(1, 1)) loss.sum().backward() # printer printer = get_printer(lms_clean_root, test_name="conv") printer("input", input, dim=0, degree=2) printer("weight", weight, dim=-1, degree=2) printer("loss", loss, dim=0, degree=2) printer("weight_grad", weight.grad, dim=-1, degree=2) printer("input_grad", input.grad, dim=0, degree=2)
def generate_data(lms_clean_root: str): torch.manual_seed(0) # model n_embedding = 20 embed_size = 60 embed = nn.Embedding(n_embedding, embed_size) n_indices = 10 indices = torch.randint(0, n_embedding, (n_indices, )) loss = embed(indices) loss.retain_grad() loss.sum().backward() # printer printer = get_printer(lms_clean_root, test_name="embedding") int_printer = get_int_printer(lms_clean_root, test_name="embedding") printer("embed", embed.weight, dim=-1, degree=2) int_printer("indices", indices, dim=0, degree=2) printer("loss", loss, dim=0, degree=2) printer("embed_grad", embed.weight.grad, dim=-1, degree=2)
def generate_data(lms_clean_root: str): torch.manual_seed(0) # model n_embedding = 20 embed_size = 60 embedding = nn.Embedding(n_embedding, embed_size) n_indices = 10 indices = torch.randint(0, n_embedding, (n_indices, )) output = embedding(indices) output.retain_grad() output.mean().backward() # printer printer = get_printer(lms_clean_root, test_name="embedding") printer("embedding.data", embedding.weight) printer("indices.data", indices) printer("output.data", output) printer("embedding_grad.data", embedding.weight.grad) printer("output_grad.data", output.grad)
def generate_data(lms_clean_root: str): torch.manual_seed(0) # model input = torch.zeros(2, 1, 3, 3) input.requires_grad = True zeros = [1] * (2 * 3 * 3) zeros[0] = 0 temp = torch.tensor(zeros, dtype=torch.float32) weight = torch.reshape(temp, (2, 1, 3, 3)) weight.requires_grad = True m = torch.nn.Softmax(dim=0) loss = input + m(weight) loss.sum().backward() # printer printer = get_printer(lms_clean_root, test_name="softmax") printer("input", input, dim=0, degree=2) printer("weight", weight, dim=0, degree=2) printer("loss", loss, dim=0, degree=2) printer("weight_grad", weight.grad, dim=0, degree=2) printer("input_grad", input.grad, dim=0, degree=2)
def fine_tune(options): # get print and save functions Print = get_printer(options) Save = get_func_on_master(torch.save, options) # get old_options model, old_opts = get_old_state(options) # subsample old dataset dataset = getattr(datasets, old_opts.dataset)(old_opts).plain_train_set indices = choose_indices(options, dataset) loader = torch.utils.data.DataLoader(torch.utils.data.Subset( dataset, indices), batch_size=options.batch_size) full_loader = torch.utils.data.DataLoader(dataset, batch_size=options.batch_size) # complete model num_classes = len(dataset.classes) intermediate_dim = int((num_classes + old_opts.projection_dim) / 2) full_model = torch.nn.Sequential( model, torch.nn.Linear(old_opts.projection_dim, intermediate_dim), torch.nn.ReLU(inplace=True), torch.nn.Linear(intermediate_dim, num_classes), torch.nn.LogSoftmax(dim=1)).to(device=options.cuda_device) # get loss criterion = torch.nn.NLLLoss() optimizer = get_optimizer(full_model, options) scheduler = get_scheduler(optimizer, options) # train for num_epochs full_model.train() # pretty printer for loss loss_val = Value(1e6, min, name="loss") loss_printer = ValuePrinter() loss_printer.track(loss_val) timer = AverageMeter() for epoch in range(options.fine_tune_epochs): t = time.time() epoch_loss = AverageMeter() for data, labels in loader: Print('.', end='') optimizer.zero_grad() out = full_model(data.to(device=options.cuda_device)) loss = criterion(out, labels.to(device=options.cuda_device)) loss.backward() optimizer.step() epoch_loss.accumulate(loss.item()) scheduler.step() loss_val.update(epoch_loss.value()) timer.accumulate(time.time() - t) Print( f" ({timer.latest():>6.2f}s) epoch {epoch+1:>3}/{options.fine_tune_epochs:>3}:" f"{loss_printer.get_formatted_line()}") Print( f"Fine tuning: {timer.total():.3f}s {options.fine_tune_epochs} epochs / {timer.value():.3f}s avg" ) # evaluate on train set once for sanity full_model.eval() acc = AverageMeter() for data, labels in full_loader: predicts = full_model(data.to(device=options.cuda_device)) predicts = predicts.argmax(dim=1) labels = labels.to(device=options.cuda_device) acc.accumulate((predicts == labels).sum().item() / predicts.size(0)) Print( f"Saving old options, model state, and base path to {options.save_path}" ) Save( { 'options': old_opts, 'model_state_dict': model.state_dict(), 'loaded_from': options.load_from.name }, options.save_path) Print(acc.value()) return acc.value()
def printer_get(printer_name): if not utils.printer_exists(printer_name): return abort(404) return jsonify(utils.get_printer(printer_name))
def train_loop(options): Print = get_printer(options) Print(options) Save = get_func_on_master(torch.save, options) # distributed stuff gpus = get_gpu_ids() options.cuda_device = f"cuda:{options.local_rank}" torch.cuda.set_device(options.local_rank) if options.distributed: torch.distributed.init_process_group(backend='nccl', init_method="env://", world_size=len(gpus), rank=options.local_rank) model = get_model(options) # Print(model) dataset = getattr(datasets, options.dataset)(options) if options.use_trainval: train_loader = get_loader(dataset.trainval_set, options) else: train_loader = get_loader(dataset.train_set, options) num_train_classes = len(train_loader.dataset.classes) # Switch off for validation and testing options.shuffle = False plain_train_loader = get_loader(dataset.plain_train_set, options) test_loader = get_loader(dataset.test_set, options) num_test_classes = len(test_loader.dataset.classes) valid_loader = get_loader(dataset.valid_set, options) num_valid_classes = len(valid_loader.dataset.classes) criterion = getattr(losses, options.loss_function)(options) final_optimizer = get_optimizer(model, options) scheduler = get_scheduler(final_optimizer, options) time_track = AverageMeter() best_model_state = model.state_dict() loss_val = Value(1e6, min, name="loss") loss_printer = ValuePrinter() loss_printer.track(loss_val) test_eval = Value(-1e6, max, name="test_acc") val_eval = Value(-1e6, max, name="val_acc") # train_eval = Value(-1e6, max, name="train_acc") eval_printer = ValuePrinter() # eval_printer.track(train_eval) eval_printer.track(val_eval) eval_printer.track(test_eval) Print((f"Starting Training on:\n" f"Train: {num_train_classes:>3d} classes\n" f"Valid: {num_valid_classes:>3d} classes\n" f"Test: {num_test_classes:>3d} classes")) Print("-" * 18) for epoch in range(options.num_epochs): model.train() epoch_loss_track = AverageMeter() # epoch start epoch_start = time.time() for aug1, aug2, _ in train_loader: final_optimizer.zero_grad() feat1 = model(aug1.to(device=options.cuda_device)) feat2 = model(aug2.to(device=options.cuda_device)) loss = criterion(feat1, feat2) loss.backward() final_optimizer.step() epoch_loss_track.accumulate(loss.item()) scheduler.step() # epoch end time_track.accumulate(time.time() - epoch_start) loss_val.update(epoch_loss_track.value()) Print( f"({time_track.latest():>7.3f}s) Epoch {epoch+1:0>3}/{options.num_epochs:>3}:", end='') Print(loss_printer.get_formatted_line(), end='') if loss_val.current_is_best: best_model_state = model.state_dict() if options.local_rank == 0 and epoch % options.eval_freq == options.eval_freq - 1: eval_start = time.time() model.eval() # train_eval.update(kmeans_on_data(model, plain_train_loader, options)) val_eval.update(kmeans_on_data(model, valid_loader, options)) test_eval.update(kmeans_on_data(model, test_loader, options)) model.train() eval_time = time.time() - eval_start Print(f" ({eval_time:>7.3f}s) ", end='') Print(eval_printer.get_formatted_line(), end='') Print() Print(( f"Training for {options.num_epochs} epochs took {time_track.total():.3f}s total " f"and {time_track.value():.3f}s average")) Print( "Calculating mean of transformed dataset using the best model state ...", end='') # since this is what will be saved later model.load_state_dict(best_model_state) model.eval() scaler = StandardScaler(copy=False, with_std=False) mean_time = time.time() for data, _ in plain_train_loader: # feat = model.module.backbone(data.to(device=options.cuda_device)).detach().cpu().numpy() feat = model( data.to(device=options.cuda_device)).detach().cpu().numpy() scaler.partial_fit(feat) mean_time = time.time() - mean_time Print(f" {mean_time:.3f}s") options.train_scaler = scaler options.log_file = options.log_file.name Print(f"Saving best model and options to {options.save_path}") save_dict = {'option': options} if options.save_model: save_dict['model_state_dict'] = best_model_state Save(save_dict, options.save_path)