# creating data samplers and loaders # NOTE: training loader cannot shuffle index !! (also no random sampler) unlabel_loader = torch.utils.data.DataLoader(unlabel_dataset, batch_size=batch_size, num_workers=12, pin_memory=True, collate_fn=collate_fn_unlabel) train_loader = torch.utils.data.DataLoader(training_dataset, batch_size=batch_size, num_workers=12, pin_memory=True, collate_fn=collate_fn) valid_sampler = SubsetRandomSampler(valid_indices) valid_loader = torch.utils.data.DataLoader(validation_dataset, batch_size=batch_size, sampler=valid_sampler, num_workers=12, pin_memory=True, collate_fn=collate_fn) if verbose: print('Load dataset: {0:.2f} s'.format(time.time() - end)) # clustering algorithm to use deepcluster = clustering.__dict__['Kmeans'](num_clusters) # training convnet with DeepCluster NMI = []
def main(): global best_acc start_epoch = args.start_epoch # start from epoch 0 or last checkpoint epoch if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) # Data print('==> Preparing dataset bach') transform_train = transforms.Compose([ transforms.Resize((224,224)), #transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor() #transforms.Normalize(mean=(182.82091190656038, 157.3296963620186, 214.87577695210769), std=(38.087092807950555, 44.85998774545851, 22.939518040097095)) ]) transform_test = transforms.Compose([ transforms.Resize((224,224)), transforms.ToTensor() #transforms.Normalize(mean=(182.82091190656038, 157.3296963620186, 214.87577695210769), std=(38.087092807950555, 44.85998774545851, 22.939518040097095)) ]) dataloader = BachDataset num_classes = 4 trainset = dataloader(root='/global/scratch/chrislu/bach/', train=True,transform=transform_train) trainloader = data.DataLoader(trainset, batch_size=args.train_batch, shuffle=True, num_workers=args.workers) testset = dataloader(root='/global/scratch/chrislu/bach/', train=False,transform=transform_test) num_test = len(testset) indices = list(range(num_test)) split = int(np.floor(args.holdout)) np.random.seed(10) np.random.shuffle(indices) holdout_idx, test_idx = indices[split:], indices[:split] test_sampler = SubsetRandomSampler(test_idx) holdout_sampler = SubsetRandomSampler(holdout_idx) testloader = data.DataLoader(testset, sampler=test_sampler, batch_size=args.test_batch, shuffle=False, num_workers=args.workers) # Model print("==> creating model '{}'".format(args.arch)) if args.arch.startswith('resnext'): model = models.__dict__[args.arch]( cardinality=args.cardinality, num_classes=num_classes, depth=args.depth, widen_factor=args.widen_factor, dropRate=args.drop, ) elif args.arch.startswith('densenet'): model = models.__dict__[args.arch]( num_classes=num_classes, depth=args.depth, growthRate=args.growthRate, compressionRate=args.compressionRate, dropRate=args.drop, ) elif args.arch.startswith('wrn'): model = models.__dict__[args.arch]( num_classes=num_classes, depth=args.depth, widen_factor=args.widen_factor, dropRate=args.drop, ) elif args.arch.endswith('resnet'): model = models.__dict__[args.arch]( num_classes=num_classes, depth=args.depth, ) else: if "alexscat_fnum" in args.arch: model = models.__dict__[args.arch](num_classes=num_classes, j = args.ascat_j, l = args.ascat_l, extra_conv = args.extra_conv) else: model = models.__dict__[args.arch](num_classes = num_classes) if args.copy_num != 0 and 'alexnet_n2_copy2' in args.arch: best_alexnet = torch.load(args.copy_path+"/model_best.pth.tar")['state_dict'] sd = model.state_dict() sd['features.0.weight'] = best_alexnet['module.features.0.weight'] sd['features.0.bias'] = best_alexnet['module.features.0.bias'] sd['features.3.weight'] = best_alexnet['module.features.3.weight'] sd['features.3.bias'] = best_alexnet['module.features.3.bias'] model.load_state_dict(sd) elif args.copy_num != 0 and 'alexnet' in args.arch: from importance_helpers import get_alexnet_important_filts filts = get_alexnet_important_filts(args.copy_path, args.copy_num) sd = model.state_dict() if args.copy_num != -1: sd['features.0.weight'][:args.copy_num] = filts else: sd['features.0.weight'][:] = filts model.load_state_dict(sd) elif args.copy_num != 0 and 'res' in args.arch: best_alexnet = torch.load(args.copy_path+"/model_best.pth.tar")['state_dict'] sd = model.state_dict() sd['classifier.weight'] = best_alexnet['module.classifier.weight'] sd['classifier.bias'] = best_alexnet['module.classifier.bias'] elif args.copy_num != 0: from importance_helpers import get_important_filts filts = get_important_filts(args.copy_path, args.ascat_l, args.copy_num) sd = model.state_dict() if args.copy_num != -1: sd['first_layer.0.weight'][:args.copy_num] = filts else: sd['first_layer.0.weight'][:] = filts model.load_state_dict(sd) #consider batch norm after first layer model = torch.nn.DataParallel(model).cuda() cudnn.benchmark = True print(' Total params: %.2fM' % (sum(p.numel() for p in model.parameters())/1000000.0)) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # Resume title = 'bach-' + args.arch if args.resume: # Load checkpoint. print('==> Resuming from checkpoint..') assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!' args.checkpoint = os.path.dirname(args.resume) checkpoint = torch.load(args.resume) best_acc = checkpoint['best_acc'] start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True) else: logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.']) if args.evaluate: print('\nEvaluation only') test_loss, test_acc = test(testloader, model, criterion, start_epoch, use_cuda) print(' Test Loss: %.8f, Test Acc: %.2f' % (test_loss, test_acc)) return # Train and val for epoch in range(start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr'])) train_loss, train_acc = train(trainloader, model, criterion, optimizer, epoch, use_cuda) test_loss, test_acc = test(testloader, model, criterion, epoch, use_cuda) # append logger file logger.append([state['lr'], train_loss, test_loss, train_acc, test_acc]) # save model is_best = test_acc > best_acc best_acc = max(test_acc, best_acc) save_checkpoint({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'acc': test_acc, 'best_acc': best_acc, 'optimizer' : optimizer.state_dict(), }, is_best, checkpoint=args.checkpoint) logger.close() logger.plot() savefig(os.path.join(args.checkpoint, 'log.eps')) print('Best acc:') print(best_acc)
def create_split_loaders(batch_size, seed, transform=transforms.ToTensor(), p_val=0.1, p_test=0.2, shuffle=True, show_sample=False, extras={}): """ Creates the DataLoader objects for the training, validation, and test sets. Params: ------- - batch_size: (int) mini-batch size to load at a time - seed: (int) Seed for random generator (use for testing/reproducibility) - transform: A torchvision.transforms object - transformations to apply to each image (Can be "transforms.Compose([transforms])") - p_val: (float) Percent (as decimal) of dataset to use for validation - p_test: (float) Percent (as decimal) of the dataset to split for testing - shuffle: (bool) Indicate whether to shuffle the dataset before splitting - show_sample: (bool) Plot a mini-example as a grid of the dataset - extras: (dict) If CUDA/GPU computing is supported, contains: - num_workers: (int) Number of subprocesses to use while loading the dataset - pin_memory: (bool) For use with CUDA - copy tensors into pinned memory (set to True if using a GPU) Otherwise, extras is an empty dict. Returns: -------- - train_loader: (DataLoader) The iterator for the training set - val_loader: (DataLoader) The iterator for the validation set - test_loader: (DataLoader) The iterator for the test set """ # Get create a ChestXrayDataset object dataset = ChestXrayDataset(transform) # Dimensions and indices of training set dataset_size = len(dataset) all_indices = list(range(dataset_size)) # Shuffle dataset before dividing into training & test sets if shuffle: np.random.seed(seed) np.random.shuffle(all_indices) # Create the validation split from the full dataset val_split = int(np.floor(p_val * dataset_size)) train_ind, val_ind = all_indices[val_split:], all_indices[:val_split] # Separate a test split from the training dataset test_split = int(np.floor(p_test * len(train_ind))) train_ind, test_ind = train_ind[test_split:], train_ind[:test_split] # Use the SubsetRandomSampler as the iterator for each subset sample_train = SubsetRandomSampler(train_ind) sample_test = SubsetRandomSampler(test_ind) sample_val = SubsetRandomSampler(val_ind) num_workers = 0 pin_memory = False # If CUDA is available if extras: num_workers = extras["num_workers"] pin_memory = extras["pin_memory"] # Define the training, test, & validation DataLoaders train_loader = DataLoader(dataset, batch_size=batch_size, sampler=sample_train, num_workers=num_workers, pin_memory=pin_memory) test_loader = DataLoader(dataset, batch_size=batch_size, sampler=sample_test, num_workers=num_workers, pin_memory=pin_memory) val_loader = DataLoader(dataset, batch_size=batch_size, sampler=sample_val, num_workers=num_workers, pin_memory=pin_memory) # Return the training, validation, test DataLoader objects return (train_loader, val_loader, test_loader)
def _update_all(self, states, actions, dones): """ - states (list[N+1]) - masks (list[N+1]) - actions (list[N]) Performs a complete update of the model by following these steps: 1. Train inverse function with ground truth data provided. 2. Infer actions in expert dataset 3. Train BC """ dataset = [{ 's0': states[i], 's1': states[i+1], 'action': actions[i] } for i in range(len(actions)) if not dones[i+1]] rutils.pstart_sep() print(f"BCO Update {self.update_i}/{self.args.bco_alpha}") print('---') print('Training inverse function') dataset_idxs = list(range(len(dataset))) np.random.shuffle(dataset_idxs) eval_len = int(len(dataset_idxs) * self.args.bco_inv_eval_holdout) if eval_len != 0.0: train_trans_sampler = BatchSampler(SubsetRandomSampler( dataset_idxs[:-eval_len]), self.args.bco_inv_batch_size, drop_last=False) val_trans_sampler = BatchSampler(SubsetRandomSampler( dataset_idxs[-eval_len:]), self.args.bco_inv_batch_size, drop_last=False) else: train_trans_sampler = BatchSampler(SubsetRandomSampler( dataset_idxs), self.args.bco_inv_batch_size, drop_last=False) if self.args.bco_inv_load is None or self.update_i > 0: infer_ac_losses = self._train_inv_func(train_trans_sampler, dataset) rutils.plot_line(infer_ac_losses, f"ac_inv_loss_{self.update_i}.png", self.args, not self.args.no_wb, self.get_completed_update_steps(self.update_i)) if self.update_i == 0: # Only save the inverse model on the first epoch for debugging # purposes rutils.save_model(self.inv_func, f"inv_func_{self.update_i}.pt", self.args) if eval_len != 0.0: if not isinstance(self.policy.action_space, spaces.Discrete): raise ValueError(('Evaluating the holdout accuracy is only', ' supported for discrete action spaces right now')) accuracy = self._infer_inv_accuracy(val_trans_sampler, dataset) print('Inferred actions with %.2f accuracy' % accuracy) if isinstance(self.expert_dataset, torch.utils.data.Subset): s0 = self.expert_dataset.dataset.trajs['obs'].to(self.args.device).float() s1 = self.expert_dataset.dataset.trajs['next_obs'].to(self.args.device).float() dataset_device = self.expert_dataset.dataset.trajs['obs'].device else: s0 = self.expert_dataset.trajs['obs'].to(self.args.device).float() s1 = self.expert_dataset.trajs['next_obs'].to(self.args.device).float() dataset_device = self.expert_dataset.trajs['obs'].device # Perform inference on the expert states with torch.no_grad(): pred_actions = self.inv_func(s0, s1).to(dataset_device) pred_actions = rutils.get_ac_compact(self.policy.action_space, pred_actions) if not self.args.bco_oracle_actions: if isinstance(self.expert_dataset, torch.utils.data.Subset): self.expert_dataset.dataset.trajs['actions'] = pred_actions else: self.expert_dataset.trajs['actions'] = pred_actions # Recreate the dataset for BC training so we can be sure it has the # most recent data. self._create_train_loader(self.args) print('Training Policy') self.full_train(self.update_i) self.update_i += 1 rutils.pend_sep()
def main(): # Training settings # Use the command line to modify the default settings parser = argparse.ArgumentParser(description='PyTorch MNIST Example') parser.add_argument('--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', help='input batch size for testing (default: 1000)') parser.add_argument('--epochs', type=int, default=14, metavar='N', help='number of epochs to train (default: 14)') parser.add_argument('--lr', type=float, default=1.0, metavar='LR', help='learning rate (default: 1.0)') parser.add_argument('--step', type=int, default=1, metavar='N', help='number of epochs between learning rate reductions (default: 1)') parser.add_argument('--gamma', type=float, default=0.7, metavar='M', help='Learning rate step gamma (default: 0.7)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--evaluate', action='store_true', default=False, help='evaluate your model on the official test set') parser.add_argument('--load-model', type=str, help='model file path') parser.add_argument('--save-model', action='store_true', default=True, help='For Saving the current Model') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} # Evaluate on the official test set if args.evaluate: assert os.path.exists(args.load_model) # Set the test model model = Net().to(device) model.load_state_dict(torch.load(args.load_model)) test_dataset = datasets.MNIST('../data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=args.test_batch_size, shuffle=True, **kwargs) test_error, test_acc = test(model, device, test_loader) with open ('test_errors.txt', 'a') as f: f.write(str(test_error) + '\n') f.close() return # Pytorch has default MNIST dataloader which loads data at each iteration train_dataset = datasets.MNIST('../data', train=True, download=True, transform=transforms.Compose([ # Data preprocessing transforms.RandomHorizontalFlip(), # Horizontally flip image with probability 0.5 transforms.RandomRotation(45), # Rotate image by 45 degrees transforms.ToTensor(), # Add data augmentation here transforms.Normalize((0.1307,), (0.3081,)) ])) # You can assign indices for training/validation or use a random subset for # training by using SubsetRandomSampler. Right now the train and validation # sets are built from the same indices - this is bad! Change it so that # the training and validation sets are disjoint and have the correct relative sizes. train_frac = 1.0 subset_indices_train = [] subset_indices_valid = [] # Setting seed so split doesn't change from run to run np.random.seed(2021) val_frac = 0.15 # Get unique labels/classes labels = train_dataset.targets.numpy() unq_labels = np.unique(labels) for unq_label in unq_labels: class_indices = np.argwhere(labels == unq_label) np.random.shuffle(class_indices) class_indices = class_indices.flatten() # Randomly sample 15% of the training examples for each class to form # a validation set split_idx = int(val_frac * class_indices.size) # Only training on a fraction of the training data train_idx = int(split_idx + (1 - train_frac) * (len(class_indices) - split_idx + 1)) subset_indices_train.extend(class_indices[train_idx:]) subset_indices_valid.extend(class_indices[:split_idx]) # Shuffle indices np.random.shuffle(subset_indices_train) np.random.shuffle(subset_indices_valid) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, sampler=SubsetRandomSampler(subset_indices_train) ) val_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.test_batch_size, sampler=SubsetRandomSampler(subset_indices_valid) ) # Load your model [fcNet, ConvNet, Net] model = Net().to(device) # Try different optimzers here [Adam, SGD, RMSprop] optimizer = optim.Adadelta(model.parameters(), lr=args.lr) # Set your learning rate scheduler scheduler = StepLR(optimizer, step_size=args.step, gamma=args.gamma) train_losses = [] val_losses = [] train_accs = [] val_accs = [] # Training loop for epoch in range(1, args.epochs + 1): train(args, model, device, train_loader, optimizer, epoch) train_loss, train_acc = test(model, device, train_loader) val_loss, val_acc = test(model, device, val_loader) scheduler.step() # learning rate scheduler # Save training and testing loss so we can plot them and check for # overfitting train_losses.append(train_loss) train_accs.append(train_acc) val_losses.append(val_loss) val_accs.append(val_acc) # If at final epoch, save losses for part 7c if epoch == args.epochs: with open ('train_errors.txt', 'a') as f: f.write(str(train_loss) + '\n') f.close() with open('train_num_examples.txt', 'a') as f: f.write(str(len(subset_indices_train)) + '\n') f.close() # You may optionally save your model at each epoch here if args.save_model: torch.save(model.state_dict(), "mnist_model.pt") # Create a plot of val and training loses plt.plot(range(1, args.epochs + 1), train_losses, label='Train Loss') plt.plot(range(1, args.epochs + 1), val_losses, label='Validation Loss') plt.title('Training and Validation Loss over Epochs') plt.legend() plt.xlabel('Epochs') plt.ylabel('Loss') plt.show()