示例#1
0
# creating data samplers and loaders
# NOTE: training loader cannot shuffle index !! (also no random sampler)
unlabel_loader = torch.utils.data.DataLoader(unlabel_dataset,
                                             batch_size=batch_size,
                                             num_workers=12,
                                             pin_memory=True,
                                             collate_fn=collate_fn_unlabel)

train_loader = torch.utils.data.DataLoader(training_dataset,
                                           batch_size=batch_size,
                                           num_workers=12,
                                           pin_memory=True,
                                           collate_fn=collate_fn)

valid_sampler = SubsetRandomSampler(valid_indices)
valid_loader = torch.utils.data.DataLoader(validation_dataset,
                                           batch_size=batch_size,
                                           sampler=valid_sampler,
                                           num_workers=12,
                                           pin_memory=True,
                                           collate_fn=collate_fn)

if verbose:
    print('Load dataset: {0:.2f} s'.format(time.time() - end))

# clustering algorithm to use
deepcluster = clustering.__dict__['Kmeans'](num_clusters)

# training convnet with DeepCluster
NMI = []
示例#2
0
def main():
    global best_acc
    start_epoch = args.start_epoch  # start from epoch 0 or last checkpoint epoch

    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)



    # Data
    print('==> Preparing dataset bach')
    transform_train = transforms.Compose([
        transforms.Resize((224,224)),
        #transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor()
        #transforms.Normalize(mean=(182.82091190656038, 157.3296963620186, 214.87577695210769), std=(38.087092807950555, 44.85998774545851, 22.939518040097095))
    ])

    transform_test = transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor()
        #transforms.Normalize(mean=(182.82091190656038, 157.3296963620186, 214.87577695210769), std=(38.087092807950555, 44.85998774545851, 22.939518040097095))
    ])

    dataloader = BachDataset
    num_classes = 4

    trainset = dataloader(root='/global/scratch/chrislu/bach/', train=True,transform=transform_train)
    trainloader = data.DataLoader(trainset, batch_size=args.train_batch, shuffle=True, num_workers=args.workers)

    testset = dataloader(root='/global/scratch/chrislu/bach/', train=False,transform=transform_test)

    num_test = len(testset)
    indices = list(range(num_test))
    split = int(np.floor(args.holdout))

    np.random.seed(10)
    np.random.shuffle(indices)

    holdout_idx, test_idx = indices[split:], indices[:split]
    test_sampler = SubsetRandomSampler(test_idx)
    holdout_sampler = SubsetRandomSampler(holdout_idx)


    testloader = data.DataLoader(testset, sampler=test_sampler, batch_size=args.test_batch, shuffle=False, num_workers=args.workers)

    # Model
    print("==> creating model '{}'".format(args.arch))
    if args.arch.startswith('resnext'):
        model = models.__dict__[args.arch](
                    cardinality=args.cardinality,
                    num_classes=num_classes,
                    depth=args.depth,
                    widen_factor=args.widen_factor,
                    dropRate=args.drop,
                )
    elif args.arch.startswith('densenet'):
        model = models.__dict__[args.arch](
                    num_classes=num_classes,
                    depth=args.depth,
                    growthRate=args.growthRate,
                    compressionRate=args.compressionRate,
                    dropRate=args.drop,
                )
    elif args.arch.startswith('wrn'):
        model = models.__dict__[args.arch](
                    num_classes=num_classes,
                    depth=args.depth,
                    widen_factor=args.widen_factor,
                    dropRate=args.drop,
                )
    elif args.arch.endswith('resnet'):
        model = models.__dict__[args.arch](
                    num_classes=num_classes,
                    depth=args.depth,
                )
    else:
        if "alexscat_fnum" in args.arch:
            model = models.__dict__[args.arch](num_classes=num_classes, j = args.ascat_j, l = args.ascat_l, extra_conv = args.extra_conv)
        else:
            model = models.__dict__[args.arch](num_classes = num_classes)

    if args.copy_num != 0 and 'alexnet_n2_copy2' in args.arch:
        best_alexnet = torch.load(args.copy_path+"/model_best.pth.tar")['state_dict']
        sd = model.state_dict()

        sd['features.0.weight'] = best_alexnet['module.features.0.weight']
        sd['features.0.bias'] = best_alexnet['module.features.0.bias']

        sd['features.3.weight'] = best_alexnet['module.features.3.weight']
        sd['features.3.bias'] = best_alexnet['module.features.3.bias']

        model.load_state_dict(sd)
    elif args.copy_num != 0 and 'alexnet' in args.arch:
        from importance_helpers import get_alexnet_important_filts
        filts = get_alexnet_important_filts(args.copy_path, args.copy_num)
        sd = model.state_dict()
        if args.copy_num != -1:
            sd['features.0.weight'][:args.copy_num] = filts
        else:
            sd['features.0.weight'][:] = filts
        model.load_state_dict(sd)

    elif args.copy_num != 0 and 'res' in args.arch:
        best_alexnet = torch.load(args.copy_path+"/model_best.pth.tar")['state_dict']
        sd = model.state_dict()
        sd['classifier.weight'] = best_alexnet['module.classifier.weight']
        sd['classifier.bias'] = best_alexnet['module.classifier.bias']

    elif args.copy_num != 0:
        from importance_helpers import get_important_filts
        filts = get_important_filts(args.copy_path, args.ascat_l, args.copy_num)
        sd = model.state_dict()
        if args.copy_num != -1:
            sd['first_layer.0.weight'][:args.copy_num] = filts
        else:
            sd['first_layer.0.weight'][:] = filts
        model.load_state_dict(sd)


        #consider batch norm after first layer


    model = torch.nn.DataParallel(model).cuda()
    cudnn.benchmark = True
    print('    Total params: %.2fM' % (sum(p.numel() for p in model.parameters())/1000000.0))

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

    # Resume
    title = 'bach-' + args.arch
    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!'
        args.checkpoint = os.path.dirname(args.resume)
        checkpoint = torch.load(args.resume)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True)
    else:
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.'])


    if args.evaluate:
        print('\nEvaluation only')
        test_loss, test_acc = test(testloader, model, criterion, start_epoch, use_cuda)
        print(' Test Loss:  %.8f, Test Acc:  %.2f' % (test_loss, test_acc))
        return

    # Train and val
    for epoch in range(start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)

        print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr']))

        train_loss, train_acc = train(trainloader, model, criterion, optimizer, epoch, use_cuda)
        test_loss, test_acc = test(testloader, model, criterion, epoch, use_cuda)

        # append logger file
        logger.append([state['lr'], train_loss, test_loss, train_acc, test_acc])

        # save model
        is_best = test_acc > best_acc
        best_acc = max(test_acc, best_acc)
        save_checkpoint({
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'acc': test_acc,
                'best_acc': best_acc,
                'optimizer' : optimizer.state_dict(),
            }, is_best, checkpoint=args.checkpoint)

    logger.close()
    logger.plot()
    savefig(os.path.join(args.checkpoint, 'log.eps'))

    print('Best acc:')
    print(best_acc)
示例#3
0
def create_split_loaders(batch_size,
                         seed,
                         transform=transforms.ToTensor(),
                         p_val=0.1,
                         p_test=0.2,
                         shuffle=True,
                         show_sample=False,
                         extras={}):
    """ Creates the DataLoader objects for the training, validation, and test sets. 

    Params:
    -------
    - batch_size: (int) mini-batch size to load at a time
    - seed: (int) Seed for random generator (use for testing/reproducibility)
    - transform: A torchvision.transforms object - transformations to apply to each image
                 (Can be "transforms.Compose([transforms])")
    - p_val: (float) Percent (as decimal) of dataset to use for validation
    - p_test: (float) Percent (as decimal) of the dataset to split for testing
    - shuffle: (bool) Indicate whether to shuffle the dataset before splitting
    - show_sample: (bool) Plot a mini-example as a grid of the dataset
    - extras: (dict) 
        If CUDA/GPU computing is supported, contains:
        - num_workers: (int) Number of subprocesses to use while loading the dataset
        - pin_memory: (bool) For use with CUDA - copy tensors into pinned memory 
                  (set to True if using a GPU)
        Otherwise, extras is an empty dict.

    Returns:
    --------
    - train_loader: (DataLoader) The iterator for the training set
    - val_loader: (DataLoader) The iterator for the validation set
    - test_loader: (DataLoader) The iterator for the test set
    """

    # Get create a ChestXrayDataset object
    dataset = ChestXrayDataset(transform)

    # Dimensions and indices of training set
    dataset_size = len(dataset)
    all_indices = list(range(dataset_size))

    # Shuffle dataset before dividing into training & test sets
    if shuffle:
        np.random.seed(seed)
        np.random.shuffle(all_indices)

    # Create the validation split from the full dataset
    val_split = int(np.floor(p_val * dataset_size))
    train_ind, val_ind = all_indices[val_split:], all_indices[:val_split]

    # Separate a test split from the training dataset
    test_split = int(np.floor(p_test * len(train_ind)))
    train_ind, test_ind = train_ind[test_split:], train_ind[:test_split]

    # Use the SubsetRandomSampler as the iterator for each subset
    sample_train = SubsetRandomSampler(train_ind)
    sample_test = SubsetRandomSampler(test_ind)
    sample_val = SubsetRandomSampler(val_ind)

    num_workers = 0
    pin_memory = False
    # If CUDA is available
    if extras:
        num_workers = extras["num_workers"]
        pin_memory = extras["pin_memory"]

    # Define the training, test, & validation DataLoaders
    train_loader = DataLoader(dataset,
                              batch_size=batch_size,
                              sampler=sample_train,
                              num_workers=num_workers,
                              pin_memory=pin_memory)

    test_loader = DataLoader(dataset,
                             batch_size=batch_size,
                             sampler=sample_test,
                             num_workers=num_workers,
                             pin_memory=pin_memory)

    val_loader = DataLoader(dataset,
                            batch_size=batch_size,
                            sampler=sample_val,
                            num_workers=num_workers,
                            pin_memory=pin_memory)

    # Return the training, validation, test DataLoader objects
    return (train_loader, val_loader, test_loader)
示例#4
0
文件: bco.py 项目: ASzot/rl-toolkit
    def _update_all(self, states, actions, dones):
        """
        - states (list[N+1])
        - masks (list[N+1])
        - actions (list[N])
        Performs a complete update of the model by following these steps:
            1. Train inverse function with ground truth data provided.
            2. Infer actions in expert dataset
            3. Train BC
        """
        dataset = [{
            's0': states[i],
            's1': states[i+1],
            'action': actions[i]
        } for i in range(len(actions))
            if not dones[i+1]]

        rutils.pstart_sep()
        print(f"BCO Update {self.update_i}/{self.args.bco_alpha}")
        print('---')

        print('Training inverse function')
        dataset_idxs = list(range(len(dataset)))
        np.random.shuffle(dataset_idxs)

        eval_len = int(len(dataset_idxs) * self.args.bco_inv_eval_holdout)
        if eval_len != 0.0:
            train_trans_sampler = BatchSampler(SubsetRandomSampler(
                dataset_idxs[:-eval_len]), self.args.bco_inv_batch_size, drop_last=False)
            val_trans_sampler = BatchSampler(SubsetRandomSampler(
                dataset_idxs[-eval_len:]), self.args.bco_inv_batch_size, drop_last=False)
        else:
            train_trans_sampler = BatchSampler(SubsetRandomSampler(
                dataset_idxs), self.args.bco_inv_batch_size, drop_last=False)

        if self.args.bco_inv_load is None or self.update_i > 0:
            infer_ac_losses = self._train_inv_func(train_trans_sampler, dataset)
            rutils.plot_line(infer_ac_losses, f"ac_inv_loss_{self.update_i}.png",
                             self.args, not self.args.no_wb,
                             self.get_completed_update_steps(self.update_i))
            if self.update_i == 0:
                # Only save the inverse model on the first epoch for debugging
                # purposes
                rutils.save_model(self.inv_func, f"inv_func_{self.update_i}.pt",
                        self.args)

        if eval_len != 0.0:
            if not isinstance(self.policy.action_space, spaces.Discrete):
                raise ValueError(('Evaluating the holdout accuracy is only',
                                  ' supported for discrete action spaces right now'))
            accuracy = self._infer_inv_accuracy(val_trans_sampler, dataset)
            print('Inferred actions with %.2f accuracy' % accuracy)

        if isinstance(self.expert_dataset, torch.utils.data.Subset):
            s0 = self.expert_dataset.dataset.trajs['obs'].to(self.args.device).float()
            s1 = self.expert_dataset.dataset.trajs['next_obs'].to(self.args.device).float()
            dataset_device = self.expert_dataset.dataset.trajs['obs'].device
        else:
            s0 = self.expert_dataset.trajs['obs'].to(self.args.device).float()
            s1 = self.expert_dataset.trajs['next_obs'].to(self.args.device).float()
            dataset_device = self.expert_dataset.trajs['obs'].device

        # Perform inference on the expert states
        with torch.no_grad():
            pred_actions = self.inv_func(s0, s1).to(dataset_device)
            pred_actions = rutils.get_ac_compact(self.policy.action_space,
                                                 pred_actions)
            if not self.args.bco_oracle_actions:
                if isinstance(self.expert_dataset, torch.utils.data.Subset):
                    self.expert_dataset.dataset.trajs['actions'] = pred_actions
                else:
                    self.expert_dataset.trajs['actions'] = pred_actions
        # Recreate the dataset for BC training so we can be sure it has the
        # most recent data.
        self._create_train_loader(self.args)

        print('Training Policy')
        self.full_train(self.update_i)
        self.update_i += 1
        rutils.pend_sep()
示例#5
0
def main():
    # Training settings
    # Use the command line to modify the default settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--batch-size', type=int, default=64, metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs', type=int, default=14, metavar='N',
                        help='number of epochs to train (default: 14)')
    parser.add_argument('--lr', type=float, default=1.0, metavar='LR',
                        help='learning rate (default: 1.0)')
    parser.add_argument('--step', type=int, default=1, metavar='N',
                        help='number of epochs between learning rate reductions (default: 1)')
    parser.add_argument('--gamma', type=float, default=0.7, metavar='M',
                        help='Learning rate step gamma (default: 0.7)')
    parser.add_argument('--no-cuda', action='store_true', default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed', type=int, default=1, metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('--log-interval', type=int, default=10, metavar='N',
                        help='how many batches to wait before logging training status')

    parser.add_argument('--evaluate', action='store_true', default=False,
                        help='evaluate your model on the official test set')
    parser.add_argument('--load-model', type=str,
                        help='model file path')

    parser.add_argument('--save-model', action='store_true', default=True,
                        help='For Saving the current Model')
    args = parser.parse_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    # Evaluate on the official test set
    if args.evaluate:
        assert os.path.exists(args.load_model)

        # Set the test model
        model = Net().to(device)
        model.load_state_dict(torch.load(args.load_model))

        test_dataset = datasets.MNIST('../data', train=False,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ]))

        test_loader = torch.utils.data.DataLoader(
            test_dataset, batch_size=args.test_batch_size, shuffle=True, **kwargs)

        test_error, test_acc = test(model, device, test_loader)
        
        with open ('test_errors.txt', 'a') as f:
            f.write(str(test_error) + '\n')

        f.close()

        return

    # Pytorch has default MNIST dataloader which loads data at each iteration
    train_dataset = datasets.MNIST('../data', train=True, download=True,
                transform=transforms.Compose([       # Data preprocessing
                    transforms.RandomHorizontalFlip(), # Horizontally flip image with probability 0.5
                    transforms.RandomRotation(45),     # Rotate image by 45 degrees
                    transforms.ToTensor(),           # Add data augmentation here
                    transforms.Normalize((0.1307,), (0.3081,))
                ]))

    # You can assign indices for training/validation or use a random subset for
    # training by using SubsetRandomSampler. Right now the train and validation
    # sets are built from the same indices - this is bad! Change it so that
    # the training and validation sets are disjoint and have the correct relative sizes.

    train_frac = 1.0

    subset_indices_train = []
    subset_indices_valid = []

    # Setting seed so split doesn't change from run to run 
    np.random.seed(2021)
    val_frac = 0.15

    # Get unique labels/classes
    labels = train_dataset.targets.numpy()
    unq_labels = np.unique(labels)

    for unq_label in unq_labels:
        class_indices = np.argwhere(labels == unq_label)
        np.random.shuffle(class_indices)
        class_indices = class_indices.flatten()

        # Randomly sample 15% of the training examples for each class to form
        # a validation set
        split_idx = int(val_frac * class_indices.size)

        # Only training on a fraction of the training data
        train_idx = int(split_idx + (1 - train_frac) * (len(class_indices) - split_idx + 1))
        subset_indices_train.extend(class_indices[train_idx:])
        subset_indices_valid.extend(class_indices[:split_idx])

    # Shuffle indices
    np.random.shuffle(subset_indices_train)
    np.random.shuffle(subset_indices_valid)

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=args.batch_size,
        sampler=SubsetRandomSampler(subset_indices_train)
    )
    val_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=args.test_batch_size,
        sampler=SubsetRandomSampler(subset_indices_valid)
    )

    # Load your model [fcNet, ConvNet, Net]
    model = Net().to(device)

    # Try different optimzers here [Adam, SGD, RMSprop]
    optimizer = optim.Adadelta(model.parameters(), lr=args.lr)

    # Set your learning rate scheduler
    scheduler = StepLR(optimizer, step_size=args.step, gamma=args.gamma)

    train_losses = []
    val_losses = []
    train_accs = []
    val_accs = []

    # Training loop
    for epoch in range(1, args.epochs + 1):
        train(args, model, device, train_loader, optimizer, epoch)
        train_loss, train_acc = test(model, device, train_loader) 
        val_loss, val_acc = test(model, device, val_loader)
        scheduler.step()    # learning rate scheduler

        # Save training and testing loss so we can plot them and check for
        # overfitting
        train_losses.append(train_loss)
        train_accs.append(train_acc)
        val_losses.append(val_loss)
        val_accs.append(val_acc)

        # If at final epoch, save losses for part 7c
        if epoch == args.epochs:
            with open ('train_errors.txt', 'a') as f:
                f.write(str(train_loss) + '\n')
            f.close()

            with open('train_num_examples.txt', 'a') as f:
                f.write(str(len(subset_indices_train)) + '\n')
            f.close()

        # You may optionally save your model at each epoch here

    if args.save_model:
        torch.save(model.state_dict(), "mnist_model.pt")

    # Create a plot of val and training loses 
    plt.plot(range(1, args.epochs + 1), train_losses, label='Train Loss')
    plt.plot(range(1, args.epochs + 1), val_losses, label='Validation Loss')
    plt.title('Training and Validation Loss over Epochs')
    plt.legend()
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.show()