Пример #1
0
def ce_gradient_norm_histogram(model,
                               data_loader,
                               tensorboard,
                               epoch,
                               name,
                               max_num_examples=5000):
    model.eval()

    pred = utils.apply_on_dataset(model=model,
                                  dataset=data_loader.dataset,
                                  output_keys_regexp='pred',
                                  description='grad-histogram:pred',
                                  max_num_examples=max_num_examples)['pred']
    n_examples = min(len(data_loader.dataset), max_num_examples)
    labels = []
    for idx in range(n_examples):
        labels.append(data_loader.dataset[idx][1])
    labels = torch.tensor(labels, dtype=torch.long)
    labels = F.one_hot(labels, num_classes=model.num_classes).float()
    labels = utils.to_cpu(labels)

    grad_wrt_logits = torch.softmax(pred, dim=-1) - labels
    grad_norms = torch.sum(grad_wrt_logits**2, dim=-1)
    grad_norms = utils.to_numpy(grad_norms)

    try:
        tensorboard.add_histogram(tag=name,
                                  values=grad_norms,
                                  global_step=epoch)
    except ValueError as e:
        print("Tensorboard histogram error: {}".format(e))
Пример #2
0
def ce_gradient_pair_scatter(model,
                             data_loader,
                             d1=0,
                             d2=1,
                             max_num_examples=2000,
                             plt=None):
    if plt is None:
        plt = matplotlib.pyplot
    model.eval()

    pred = utils.apply_on_dataset(model=model,
                                  dataset=data_loader.dataset,
                                  output_keys_regexp='pred',
                                  max_num_examples=max_num_examples,
                                  description='grad-pair-scatter:pred')['pred']
    n_examples = min(len(data_loader.dataset), max_num_examples)
    labels = []
    for idx in range(n_examples):
        labels.append(data_loader.dataset[idx][1])
    labels = torch.tensor(labels, dtype=torch.long)
    labels = F.one_hot(labels, num_classes=model.num_classes).float()
    labels = utils.to_cpu(labels)
    grad_wrt_logits = torch.softmax(pred, dim=-1) - labels
    grad_wrt_logits = utils.to_numpy(grad_wrt_logits)

    fig, ax = plt.subplots(1, figsize=(5, 5))
    plt.scatter(grad_wrt_logits[:, d1], grad_wrt_logits[:, d2])
    ax.set_xlabel(str(d1))
    ax.set_ylabel(str(d2))
    # L = np.percentile(grad_wrt_logits, q=5, axis=0)
    # R = np.percentile(grad_wrt_logits, q=95, axis=0)
    # ax.set_xlim(L[d1], R[d1])
    # ax.set_ylim(L[d2], R[d2])
    ax.set_title('Two coordinates of grad wrt to logits')
    return fig, plt
Пример #3
0
def pred_gradient_pair_scatter(model,
                               data_loader,
                               d1=0,
                               d2=1,
                               max_num_examples=2000,
                               plt=None):
    if plt is None:
        plt = matplotlib.pyplot
    model.eval()
    grad_pred = utils.apply_on_dataset(
        model=model,
        dataset=data_loader.dataset,
        output_keys_regexp='grad_pred',
        max_num_examples=max_num_examples,
        description='grad-pair-scatter:grad_pred')['grad_pred']
    grad_pred = utils.to_numpy(grad_pred)
    fig, ax = plt.subplots(1, figsize=(5, 5))
    plt.scatter(grad_pred[:, d1], grad_pred[:, d2])
    ax.set_xlabel(str(d1))
    ax.set_ylabel(str(d2))
    # L = np.percentile(grad_pred, q=5, axis=0)
    # R = np.percentile(grad_pred, q=95, axis=0)
    # ax.set_xlim(L[d1], R[d1])
    # ax.set_ylim(L[d2], R[d2])
    ax.set_title('Two coordinates of grad wrt to logits')
    return fig, plt
Пример #4
0
    def __init__(self,
                 model,
                 train_data,
                 val_data=None,
                 l2_reg_coef=0.0,
                 **kwargs):
        super(LinearizedModelV2, self).__init__(**kwargs)
        self.model = model
        self.train_data = train_data
        self.val_data = val_data
        self.l2_reg_coef = l2_reg_coef

        # copy the parameters at initialization
        self.init_params = copy.deepcopy(dict(model.named_parameters()))
        for k, v in self.init_params.items():
            v.detach_()
            v.requires_grad = False  # to stop training

        # compute all gradients
        self.jacobians = dict()
        jacobian_estimator = JacobianEstimator(projection='none')
        self.jacobians['train'] = jacobian_estimator.compute_jacobian(
            model=model, dataset=train_data, cpu=False)
        if val_data is not None:
            self.jacobians['val'] = jacobian_estimator.compute_jacobian(
                model=model, dataset=val_data, cpu=False)
        for partition in self.jacobians.keys():
            for k, v in self.jacobians[partition].items():
                v.detach_()  # in case they some computation graph was built

        # compute predictions at initialization
        self.init_preds = dict()
        self.init_preds['train'] = utils.apply_on_dataset(
            model=model,
            dataset=train_data,
            output_keys_regexp='pred',
            cpu=False)['pred']
        if val_data is not None:
            self.init_preds['val'] = utils.apply_on_dataset(
                model=model,
                dataset=val_data,
                output_keys_regexp='pred',
                cpu=False)['pred']
        for partition in self.init_preds.keys():
            self.init_preds[partition].detach_(
            )  # in case they some computation graph was built
Пример #5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--device', '-d', default='cuda')

    parser.add_argument('--batch_size', '-b', type=int, default=256)
    parser.add_argument('--seed', type=int, default=42)

    parser.add_argument('--dataset', '-D', type=str, default='mnist',
                        choices=['mnist', 'uniform-noise-mnist',
                                 'cifar10', 'uniform-noise-cifar10', 'pair-noise-cifar10',
                                 'cifar100', 'uniform-noise-cifar100',
                                 'clothing1m', 'imagenet'])
    parser.add_argument('--data_augmentation', '-A', action='store_true', dest='data_augmentation')
    parser.set_defaults(data_augmentation=False)
    parser.add_argument('--num_train_examples', type=int, default=None)
    parser.add_argument('--error_prob', '-n', type=float, default=0.0)
    parser.add_argument('--clean_validation', dest='clean_validation', action='store_true')
    parser.set_defaults(clean_validation=False)

    parser.add_argument('--load_from', type=str, default=None, required=True)
    parser.add_argument('--output_dir', '-o', type=str, default=None)

    args = parser.parse_args()
    print(args)

    # Load data
    _, _, test_loader, _ = load_data_from_arguments(args)

    print(f"Testing the model saved at {args.load_from}")
    model = utils.load(args.load_from, methods=methods, device=args.device)
    ret = utils.apply_on_dataset(model, test_loader.dataset, batch_size=args.batch_size,
                                 output_keys_regexp='pred|label', description='Testing')
    pred = ret['pred']
    labels = ret['label']
    if args.output_dir is not None:
        with open(os.path.join(args.output_dir, 'test_predictions.pkl'), 'wb') as f:
            pickle.dump({'pred': pred, 'labels': labels}, f)

    accuracy = torch.mean((pred.argmax(dim=1) == labels).float())
    print(accuracy)
    if args.output_dir is not None:
        with open(os.path.join(args.output_dir, 'test_accuracy.txt'), 'w') as f:
            f.write("{}\n".format(accuracy))
def estimate_transition(load_from, data_loader, device='cpu', batch_size=256):
    """ Estimates the label noise matrix. The code is adapted form the original implementation.
    Source: https://github.com/giorgiop/loss-correction/.
    """
    assert load_from is not None
    model = utils.load(load_from, methods=methods, device=device)
    pred = utils.apply_on_dataset(model=model,
                                  dataset=data_loader.dataset,
                                  batch_size=batch_size,
                                  cpu=True,
                                  description="Estimating transition matrix",
                                  output_keys_regexp='pred')['pred']
    pred = torch.softmax(pred, dim=1)
    pred = utils.to_numpy(pred)

    c = model.num_classes
    T = np.zeros((c, c))
    filter_outlier = True

    # find a 'perfect example' for each class
    for i in range(c):
        if not filter_outlier:
            idx_best = np.argmax(pred[:, i])
        else:
            thresh = np.percentile(pred[:, i], 97, interpolation='higher')
            robust_eta = pred[:, i]
            robust_eta[robust_eta >= thresh] = 0.0
            idx_best = np.argmax(robust_eta)

        for j in range(c):
            T[i, j] = pred[idx_best, j]

    # row normalize
    row_sums = T.sum(axis=1, keepdims=True)
    T /= row_sums

    T = torch.tensor(T, dtype=torch.float).to(device)
    print(T)

    return T
Пример #7
0
def pred_gradient_norm_histogram(model,
                                 data_loader,
                                 tensorboard,
                                 epoch,
                                 name,
                                 max_num_examples=5000):
    model.eval()
    grad_pred = utils.apply_on_dataset(
        model=model,
        dataset=data_loader.dataset,
        output_keys_regexp='grad_pred',
        description='grad-histogram:grad_pred',
        max_num_examples=max_num_examples)['grad_pred']
    grad_norms = torch.sum(grad_pred**2, dim=-1)
    grad_norms = utils.to_numpy(grad_norms)

    try:
        tensorboard.add_histogram(tag=name,
                                  values=grad_norms,
                                  global_step=epoch)
    except ValueError as e:
        print("Tensorboard histogram error: {}".format(e))
Пример #8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--config',
        '-c',
        type=str,
        default='sample_info/configs/1hidden-mlp-n1024-binary-mnist.json')
    parser.add_argument('--device',
                        '-d',
                        default='cuda',
                        help='specifies the main device')
    parser.add_argument('--seed', type=int, default=42)

    # hyper-parameters
    parser.add_argument('--model_class',
                        '-m',
                        type=str,
                        default='ClassifierL2')

    parser.add_argument('--lr', type=float, default=1e-2, help='Learning rate')
    args = parser.parse_args()
    print(args)

    # Build data
    data_X, data_Y = get_synthetic_data(args.seed)
    half = len(data_X) // 2
    train_data = TensorDataset(
        torch.tensor(data_X[:half]).float(),
        torch.tensor(data_Y[:half]).long().reshape((-1, 1)))
    val_data = TensorDataset(
        torch.tensor(data_X[half:]).float(),
        torch.tensor(data_Y[half:]).long().reshape((-1, 1)))

    with open(args.config, 'r') as f:
        architecture_args = json.load(f)

    model_class = getattr(methods, args.model_class)

    model = model_class(input_shape=train_data[0][0].shape,
                        architecture_args=architecture_args,
                        device=args.device)

    jacobian_estimator = JacobianEstimator(projection='none')
    jacobians = jacobian_estimator.compute_jacobian(model=model,
                                                    dataset=train_data,
                                                    output_key='pred',
                                                    cpu=False)
    # val_jacobians = get_jacobians(model=model, dataset=val_data, output_key='pred', cpu=False)
    init_preds = utils.apply_on_dataset(model=model,
                                        dataset=train_data,
                                        cpu=False)['pred']
    # val_init_preds = utils.apply_on_dataset(model=model, dataset=val_data, cpu=False)['pred']
    init_params = dict(model.named_parameters())
    ntk = compute_ntk(jacobians=jacobians)

    Y = [torch.tensor([y]) for (x, y) in train_data]
    Y = torch.stack(Y).float().to(ntk.device)

    ts = range(0, 1001, 20)
    for idx, t in tqdm(enumerate(ts), desc='main loop', total=len(ts)):
        _, q = weight_stability(t=t,
                                n=len(train_data),
                                eta=args.lr / len(train_data),
                                init_params=init_params,
                                jacobians=jacobians,
                                ntk=ntk,
                                init_preds=init_preds,
                                Y=Y,
                                continuous=False,
                                return_change_vectors=False,
                                scale_by_hessian=False)

        fig, ax = plot(q, data_X=data_X, data_Y=data_Y, half=half, t=t)
        file_path = f'sample_info/plots/synthetic-data/weight-{idx:04d}.png'
        utils.make_path(os.path.dirname(file_path))
        fig.savefig(file_path)
        plt.close()

    # save video
    cur_dir = os.path.abspath(os.curdir)
    os.chdir('sample_info/plots/synthetic-data')
    os.system("ffmpeg -r 2 -i weight-%04d.png movie.webm")
    os.chdir(cur_dir)
Пример #9
0
def prepare_needed_items(model,
                         train_data,
                         test_data=None,
                         projection='none',
                         cpu=False,
                         batch_size=256,
                         **kwargs):
    jacobian_estimator = JacobianEstimator(projection=projection, **kwargs)
    train_jacobians = jacobian_estimator.compute_jacobian(model=model,
                                                          dataset=train_data,
                                                          output_key='pred',
                                                          cpu=cpu)
    test_jacobians = None
    if test_data is not None:
        test_jacobians = jacobian_estimator.compute_jacobian(model=model,
                                                             dataset=test_data,
                                                             output_key='pred',
                                                             cpu=cpu)

    train_init_preds = utils.apply_on_dataset(model=model,
                                              dataset=train_data,
                                              cpu=cpu,
                                              batch_size=batch_size)['pred']
    test_init_preds = None
    if test_data is not None:
        test_init_preds = utils.apply_on_dataset(model=model,
                                                 dataset=test_data,
                                                 cpu=cpu,
                                                 batch_size=batch_size)['pred']

    init_params = dict(model.named_parameters())
    if cpu:
        for k, v in init_params.items():
            init_params[k] = v.to('cpu')

    ntk = compute_ntk(jacobians=train_jacobians)
    lamb, _ = torch.eig(ntk)
    lamb = lamb[:, 0]
    logging.info(f'Min eigenvalue of NTK: {torch.min(lamb).item():.3f}\t'
                 f'Max eigenvalue of NTK: {torch.max(lamb).item():.3f}')
    if torch.min(lamb).item() < 0:
        logging.warning(
            'The lowest eigenvalue of NTK is negative, consider adding at least small weight decay.'
        )

    test_train_ntk = None
    if test_data is not None:
        test_train_ntk = compute_test_train_ntk(
            train_jacobians=train_jacobians, test_jacobians=test_jacobians)

    def extract_labels(data):
        ys = [
            utils.to_tensor(y, device=ntk.device).view((-1, )) for x, y in data
        ]
        return torch.stack(ys).float()

    train_Y = extract_labels(train_data)
    test_Y = None
    if test_data is not None:
        test_Y = extract_labels(test_data)

    return {
        'jacobian_estimator': jacobian_estimator,
        'train_jacobians': train_jacobians,
        'test_jacobians': test_jacobians,
        'train_init_preds': train_init_preds,
        'test_init_preds': test_init_preds,
        'init_params': init_params,
        'ntk': ntk,
        'test_train_ntk': test_train_ntk,
        'train_Y': train_Y,
        'test_Y': test_Y
    }
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--config', '-c', type=str, required=True)
    parser.add_argument('--device',
                        '-d',
                        default='cuda',
                        help='specifies the main device')
    parser.add_argument(
        '--all_device_ids',
        nargs='+',
        type=str,
        default=None,
        help=
        "If not None, this list specifies devices for multiple GPU training. "
        "The first device should match with the main device (args.device).")
    parser.add_argument('--batch_size', '-b', type=int, default=256)
    parser.add_argument('--epochs', '-e', type=int, default=400)
    parser.add_argument('--stopping_param', type=int, default=2**30)
    parser.add_argument('--save_iter', '-s', type=int, default=2**30)
    parser.add_argument('--vis_iter', '-v', type=int, default=2**30)
    parser.add_argument('--seed', type=int, default=42)
    parser.add_argument(
        '--num_accumulation_steps',
        default=1,
        type=int,
        help='Number of training steps to accumulate before updating weights')

    # data parameters
    parser.add_argument('--dataset', '-D', type=str, default='mnist')
    parser.add_argument('--data_augmentation',
                        '-A',
                        action='store_true',
                        dest='data_augmentation')
    parser.set_defaults(data_augmentation=False)
    parser.add_argument('--error_prob', '-n', type=float, default=0.0)
    parser.add_argument('--num_train_examples', type=int, default=None)
    parser.add_argument('--clean_validation',
                        action='store_true',
                        default=False)
    parser.add_argument('--resize_to_imagenet',
                        action='store_true',
                        dest='resize_to_imagenet')
    parser.set_defaults(resize_to_imagenet=False)
    parser.add_argument('--cache_dataset',
                        action='store_true',
                        dest='cache_dataset')
    parser.set_defaults(cache_dataset=False)
    parser.add_argument(
        '--sample_ranking_file',
        type=str,
        default=None,
        help=
        'Points to a pickle file that stores an ordering of examples from least to '
        'most important. The most important args.exclude_ratio number of samples '
        'will be excluded from training.')
    parser.add_argument('--exclude_ratio',
                        type=float,
                        default=0.0,
                        help='Fraction of examples to exclude.')
    parser.add_argument('--exclude_side',
                        type=str,
                        default='top',
                        choices=['top', 'bottom'],
                        help='from which side of the order to remove')
    parser.add_argument('--num_workers',
                        type=int,
                        default=0,
                        help='number of workers in data loaders')

    # hyper-parameters
    parser.add_argument('--model_class',
                        '-m',
                        type=str,
                        default='ClassifierL2')

    parser.add_argument('--l2_reg_coef', type=float, default=0.0)
    parser.add_argument('--lr', type=float, default=1e-3, help='Learning rate')
    parser.add_argument('--optimizer',
                        type=str,
                        default='adam',
                        choices=['adam', 'sgd'])
    parser.add_argument('--random_baseline_seed', type=int, default=42)

    parser.add_argument('--output_dir',
                        '-o',
                        type=str,
                        default='sample_info/results/data-summarization/')
    parser.add_argument('--baseline_name', '-B', type=str, required=True)
    parser.add_argument('--exp_name', '-E', type=str, required=True)

    args = parser.parse_args()
    print(args)

    # set tensorboard log directory
    args.log_dir = os.path.join(args.output_dir, args.baseline_name,
                                args.exp_name, 'logs')
    utils.make_path(args.log_dir)

    # Load data
    train_data, val_data, test_data, _ = load_data_from_arguments(
        args, build_loaders=False)

    # exclude samples
    np.random.seed(args.random_baseline_seed)
    order = np.random.permutation(len(train_data))

    # if sample ranking file is given, take the order from there
    if args.sample_ranking_file is not None:
        with open(args.sample_ranking_file, 'rb') as f:
            order = pickle.load(f)

    exclude_count = int(args.exclude_ratio * len(train_data))
    if exclude_count == 0:
        exclude_indices = []
    else:
        if args.exclude_side == 'top':
            exclude_indices = order[-exclude_count:]
        else:
            exclude_indices = order[:exclude_count]

    train_data = SubsetDataWrapper(dataset=train_data,
                                   exclude_indices=exclude_indices)

    if args.cache_dataset:
        train_data = CacheDatasetWrapper(train_data)
        val_data = CacheDatasetWrapper(val_data)
        test_data = CacheDatasetWrapper(test_data)

    shuffle_train = (args.batch_size * args.num_accumulation_steps <
                     len(train_data))
    train_loader, val_loader, test_loader = get_loaders_from_datasets(
        train_data,
        val_data,
        test_data,
        batch_size=args.batch_size,
        num_workers=args.num_workers,
        shuffle_train=shuffle_train)

    # Options
    optimization_args = {
        'optimizer': {
            'name': args.optimizer,
            'lr': args.lr,
        }
    }

    with open(args.config, 'r') as f:
        architecture_args = json.load(f)

    model_class = getattr(methods, args.model_class)

    model = model_class(input_shape=train_loader.dataset[0][0].shape,
                        architecture_args=architecture_args,
                        l2_reg_coef=args.l2_reg_coef,
                        device=args.device,
                        seed=args.seed)

    # put the model in always eval mode. This makes sure that in case the network has pretrained BatchNorm
    # layers, their running average is fixed.
    utils.put_always_eval_mode(model)

    metrics_list = [
        metrics.Accuracy(output_key='pred',
                         one_hot=(train_data[0][1].ndim > 0))
    ]
    if args.dataset == 'imagenet':
        metrics_list.append(metrics.TopKAccuracy(k=5, output_key='pred'))

    stopper = callbacks.EarlyStoppingWithMetric(
        metric=metrics_list[0],
        stopping_param=args.stopping_param,
        partition='val',
        direction='max')

    training.train(model=model,
                   train_loader=train_loader,
                   val_loader=val_loader,
                   epochs=args.epochs,
                   save_iter=args.save_iter,
                   vis_iter=args.vis_iter,
                   optimization_args=optimization_args,
                   log_dir=args.log_dir,
                   args_to_log=args,
                   stopper=stopper,
                   metrics=metrics_list,
                   device_ids=args.all_device_ids,
                   num_accumulation_steps=args.num_accumulation_steps)

    val_preds = utils.apply_on_dataset(model=model,
                                       dataset=val_data,
                                       cpu=True,
                                       partition='val',
                                       batch_size=args.batch_size)['pred']
    val_acc = metrics_list[0].value(epoch=args.epochs - 1, partition='val')

    file_name = f'results-{args.exclude_ratio:.4f}'
    if args.baseline_name == 'random':
        file_name += f'-{args.random_baseline_seed}'
    file_name += '.pkl'
    file_path = os.path.join(args.output_dir, args.baseline_name,
                             args.exp_name, file_name)
    utils.make_path(os.path.dirname(file_path))
    with open(file_path, 'wb') as f:
        pickle.dump({
            'val_preds': val_preds,
            'val_acc': val_acc,
            'args': args
        }, f)
Пример #11
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--config', '-c', type=str, required=True)
    parser.add_argument('--device',
                        '-d',
                        default='cuda',
                        help='specifies the main device')
    parser.add_argument(
        '--all_device_ids',
        nargs='+',
        type=str,
        default=None,
        help=
        "If not None, this list specifies devices for multiple GPU training. "
        "The first device should match with the main device (args.device).")
    parser.add_argument('--batch_size', '-b', type=int, default=2**20)
    parser.add_argument('--epochs', '-e', type=int, default=2000)
    parser.add_argument('--stopping_param', type=int, default=2**20)
    parser.add_argument('--save_iter', '-s', type=int, default=2**20)
    parser.add_argument('--vis_iter', '-v', type=int, default=2**20)
    parser.add_argument('--log_dir',
                        '-l',
                        type=str,
                        default='sample_info/logs/junk')
    parser.add_argument('--seed', type=int, default=42)
    parser.add_argument(
        '--num_accumulation_steps',
        default=1,
        type=int,
        help='Number of training steps to accumulate before updating weights')

    # data parameters
    parser.add_argument(
        '--dataset',
        '-D',
        type=str,
        default='mnist4vs9',
        choices=[
            'mnist4vs9', 'synthetic', 'cifar10-cat-vs-dog', 'cats-and-dogs'
        ],
        help='Which dataset to use. One can add more choices if needed.')
    parser.add_argument('--data_augmentation',
                        '-A',
                        action='store_true',
                        dest='data_augmentation')
    parser.set_defaults(data_augmentation=False)
    parser.add_argument('--error_prob', '-n', type=float, default=0.0)
    parser.add_argument('--num_train_examples', type=int, default=None)
    parser.add_argument('--clean_validation',
                        action='store_true',
                        default=False)
    parser.add_argument('--resize_to_imagenet',
                        action='store_true',
                        dest='resize_to_imagenet')
    parser.set_defaults(resize_to_imagenet=False)
    parser.add_argument('--cache_dataset',
                        action='store_true',
                        dest='cache_dataset')
    parser.set_defaults(cache_dataset=False)
    parser.add_argument('--num_workers',
                        type=int,
                        default=0,
                        help='number of workers in data loaders')
    parser.add_argument('--exclude_index',
                        type=int,
                        default=None,
                        help='Index of an example to remove.')

    # hyper-parameters
    parser.add_argument('--model_class',
                        '-m',
                        type=str,
                        default='ClassifierL2')
    parser.add_argument('--linearized', dest='linearized', action='store_true')
    parser.set_defaults(linearized=False)

    parser.add_argument('--l2_reg_coef', type=float, default=0.0)
    parser.add_argument('--lr', type=float, default=1e-3, help='Learning rate')
    parser.add_argument('--optimizer',
                        type=str,
                        default='sgd',
                        choices=['adam', 'sgd'])

    parser.add_argument(
        '--output_dir',
        '-o',
        type=str,
        default='sample_info/results/ground-truth/ground-truth/')
    parser.add_argument('--exp_name', '-E', type=str, required=True)
    args = parser.parse_args()
    print(args)

    # Build data
    train_data, val_data, test_data, _ = load_data_from_arguments(
        args, build_loaders=False)

    # exclude the example
    if args.exclude_index is not None:
        train_data = SubsetDataWrapper(dataset=train_data,
                                       exclude_indices=[args.exclude_index])

    if args.cache_dataset:
        train_data = CacheDatasetWrapper(train_data)
        val_data = CacheDatasetWrapper(val_data)
        test_data = CacheDatasetWrapper(test_data)

    shuffle_train = (args.batch_size * args.num_accumulation_steps <
                     len(train_data))
    train_loader, val_loader, test_loader = get_loaders_from_datasets(
        train_data,
        val_data,
        test_data,
        batch_size=args.batch_size,
        num_workers=args.num_workers,
        shuffle_train=shuffle_train)

    # Options
    optimization_args = {
        'optimizer': {
            'name': args.optimizer,
            'lr': args.lr,
        }
    }

    with open(args.config, 'r') as f:
        architecture_args = json.load(f)

    model_class = getattr(methods, args.model_class)

    model = model_class(input_shape=train_loader.dataset[0][0].shape,
                        architecture_args=architecture_args,
                        l2_reg_coef=args.l2_reg_coef,
                        seed=args.seed,
                        device=args.device)

    # put the model in always eval mode. This makes sure that in case the network has pretrained BatchNorm
    # layers, their running average is fixed.
    utils.put_always_eval_mode(model)

    if args.linearized:
        print("Using a linearized model")
        model = LinearizedModelV2(model=model,
                                  train_data=train_data,
                                  val_data=val_data,
                                  l2_reg_coef=args.l2_reg_coef)

    if args.dataset == 'synthetic':
        model.visualize = (lambda *args, **kwargs: {}
                           )  # no visualization is needed

    metrics_list = [metrics.Accuracy(output_key='pred')]

    training.train(model=model,
                   train_loader=train_loader,
                   val_loader=val_loader,
                   epochs=args.epochs + 1,
                   save_iter=args.save_iter,
                   vis_iter=args.vis_iter,
                   optimization_args=optimization_args,
                   log_dir=args.log_dir,
                   args_to_log=args,
                   metrics=metrics_list,
                   device_ids=args.all_device_ids,
                   num_accumulation_steps=args.num_accumulation_steps)

    params = dict(model.named_parameters())
    for k in params.keys():
        params[k] = utils.to_cpu(params[k])
    val_preds = utils.apply_on_dataset(model=model,
                                       dataset=val_data,
                                       cpu=True,
                                       partition='val',
                                       batch_size=args.batch_size)['pred']
    val_acc = metrics_list[0].value(epoch=args.epochs, partition='val')

    exp_dir = os.path.join(args.output_dir, args.exp_name)

    # if it the the full dataset save params and val_preds, otherwise compare to the saved weights/predictions
    if args.exclude_index is None:
        file_path = os.path.join(exp_dir, 'full-data-training.pkl')
    else:
        file_path = os.path.join(exp_dir, f'{args.exclude_index}.pkl')

    utils.make_path(os.path.dirname(file_path))
    with open(file_path, 'wb') as f:
        pickle.dump(
            {
                'weights': params,
                'val_preds': val_preds,
                'val_acc': val_acc,
                'args': args
            }, f)
Пример #12
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--config', '-c', type=str, required=True)
    parser.add_argument('--device', '-d', default='cuda')
    parser.add_argument(
        '--all_device_ids',
        nargs='+',
        type=str,
        default=None,
        help=
        "If not None, this list specifies devices for multiple GPU training. "
        "The first device should match with the main device (args.device).")

    parser.add_argument('--batch_size', '-b', type=int, default=256)
    parser.add_argument('--epochs', '-e', type=int, default=400)
    parser.add_argument('--stopping_param', type=int, default=50)
    parser.add_argument('--save_iter', '-s', type=int, default=10)
    parser.add_argument('--vis_iter', '-v', type=int, default=10)
    parser.add_argument('--log_dir', '-l', type=str, default=None)
    parser.add_argument('--seed', type=int, default=42)

    parser.add_argument('--dataset',
                        '-D',
                        type=str,
                        default='mnist',
                        choices=[
                            'mnist', 'uniform-noise-mnist', 'cifar10',
                            'uniform-noise-cifar10', 'pair-noise-cifar10',
                            'cifar100', 'uniform-noise-cifar100', 'clothing1m',
                            'imagenet'
                        ])
    parser.add_argument('--data_augmentation',
                        '-A',
                        action='store_true',
                        dest='data_augmentation')
    parser.set_defaults(data_augmentation=False)
    parser.add_argument('--num_train_examples', type=int, default=None)
    parser.add_argument('--error_prob', '-n', type=float, default=0.0)
    parser.add_argument('--clean_validation',
                        dest='clean_validation',
                        action='store_true')
    parser.set_defaults(clean_validation=False)

    parser.add_argument('--model_class',
                        '-m',
                        type=str,
                        default='StandardClassifier')
    parser.add_argument(
        '--loss_function',
        type=str,
        default='ce',
        choices=['ce', 'mse', 'mae', 'gce', 'dmi', 'fw', 'none'])
    parser.add_argument('--loss_function_param', type=float, default=1.0)
    parser.add_argument('--load_from', type=str, default=None)
    parser.add_argument('--grad_weight_decay', '-L', type=float, default=0.0)
    parser.add_argument('--grad_l1_penalty', '-S', type=float, default=0.0)
    parser.add_argument('--lamb', type=float, default=1.0)
    parser.add_argument('--pretrained_arg', '-r', type=str, default=None)
    parser.add_argument('--sample_from_q',
                        action='store_true',
                        dest='sample_from_q')
    parser.set_defaults(sample_from_q=False)
    parser.add_argument('--q_dist',
                        type=str,
                        default='Gaussian',
                        choices=['Gaussian', 'Laplace', 'dot', 'ce'])
    parser.add_argument('--no-detach', dest='detach', action='store_false')
    parser.set_defaults(detach=True)
    parser.add_argument('--warm_up',
                        type=int,
                        default=0,
                        help='Number of epochs to skip before '
                        'starting to train using predicted gradients')
    parser.add_argument('--weight_decay', type=float, default=0.0)

    parser.add_argument(
        '--add_noise',
        action='store_true',
        dest='add_noise',
        help='add noise to the gradients of a standard classifier.')
    parser.set_defaults(add_noise=False)
    parser.add_argument('--noise_type',
                        type=str,
                        default='Gaussian',
                        choices=['Gaussian', 'Laplace'])
    parser.add_argument('--noise_std', type=float, default=0.0)

    parser.add_argument('--lr', type=float, default=1e-3, help='Learning rate')
    args = parser.parse_args()
    print(args)

    # Load data
    train_loader, val_loader, test_loader, _ = load_data_from_arguments(args)

    # Options
    optimization_args = {
        'optimizer': {
            'name': 'adam',
            'lr': args.lr,
            'weight_decay': args.weight_decay
        }
    }

    # optimization_args = {
    #     'optimizer': {
    #         'name': 'sgd',
    #         'lr': 1e-3,
    #     },
    #     'scheduler': {
    #         'step_size': 15,
    #         'gamma': 1.25
    #     }
    # }

    with open(args.config, 'r') as f:
        architecture_args = json.load(f)

    model_class = getattr(methods, args.model_class)

    model = model_class(input_shape=train_loader.dataset[0][0].shape,
                        architecture_args=architecture_args,
                        pretrained_arg=args.pretrained_arg,
                        device=args.device,
                        grad_weight_decay=args.grad_weight_decay,
                        grad_l1_penalty=args.grad_l1_penalty,
                        lamb=args.lamb,
                        sample_from_q=args.sample_from_q,
                        q_dist=args.q_dist,
                        load_from=args.load_from,
                        loss_function=args.loss_function,
                        loss_function_param=args.loss_function_param,
                        add_noise=args.add_noise,
                        noise_type=args.noise_type,
                        noise_std=args.noise_std,
                        detach=args.detach,
                        warm_up=args.warm_up)

    metrics_list = [metrics.Accuracy(output_key='pred')]
    if args.dataset == 'imagenet':
        metrics_list.append(metrics.TopKAccuracy(k=5, output_key='pred'))

    callbacks_list = [
        callbacks.SaveBestWithMetric(metric=metrics_list[0],
                                     partition='val',
                                     direction='max')
    ]

    stopper = callbacks.EarlyStoppingWithMetric(
        metric=metrics_list[0],
        stopping_param=args.stopping_param,
        partition='val',
        direction='max')

    training.train(model=model,
                   train_loader=train_loader,
                   val_loader=val_loader,
                   epochs=args.epochs,
                   save_iter=args.save_iter,
                   vis_iter=args.vis_iter,
                   optimization_args=optimization_args,
                   log_dir=args.log_dir,
                   args_to_log=args,
                   stopper=stopper,
                   metrics=metrics_list,
                   callbacks=callbacks_list,
                   device_ids=args.all_device_ids)

    # if training finishes successfully, compute the test score
    print("Testing the best validation model...")
    model = utils.load(os.path.join(args.log_dir, 'checkpoints',
                                    'best_val.mdl'),
                       methods=methods,
                       device=args.device)
    pred = utils.apply_on_dataset(model,
                                  test_loader.dataset,
                                  batch_size=args.batch_size,
                                  output_keys_regexp='pred',
                                  description='Testing')['pred']
    labels = [p[1] for p in test_loader.dataset]
    labels = torch.tensor(labels, dtype=torch.long)
    labels = utils.to_cpu(labels)
    with open(os.path.join(args.log_dir, 'test_predictions.pkl'), 'wb') as f:
        pickle.dump({'pred': pred, 'labels': labels}, f)

    accuracy = torch.mean((pred.argmax(dim=1) == labels).float())
    with open(os.path.join(args.log_dir, 'test_accuracy.txt'), 'w') as f:
        f.write("{}\n".format(accuracy))
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--config', '-c', type=str, required=True)
    parser.add_argument('--device', '-d', default='cuda')
    parser.add_argument(
        '--all_device_ids',
        nargs='+',
        type=str,
        default=None,
        help=
        "If not None, this list specifies devices for multiple GPU training. "
        "The first device should match with the main device (args.device).")

    parser.add_argument('--batch_size', '-b', type=int, default=128)
    parser.add_argument('--epochs', '-e', type=int, default=4000)
    parser.add_argument('--stopping_param', type=int, default=2**30)
    parser.add_argument('--save_iter', '-s', type=int, default=100)
    parser.add_argument('--vis_iter', '-v', type=int, default=10)
    parser.add_argument('--log_dir', '-l', type=str, default=None)
    parser.add_argument('--seed', type=int, default=42)

    parser.add_argument('--dataset',
                        '-D',
                        type=str,
                        default='uniform-noise-cifar10',
                        choices=['uniform-noise-cifar10'])
    parser.add_argument('--data_augmentation',
                        '-A',
                        action='store_true',
                        dest='data_augmentation')
    parser.set_defaults(data_augmentation=False)
    parser.add_argument('--num_train_examples', type=int, default=None)
    parser.add_argument('--error_prob', '-n', type=float, default=0.0)
    parser.add_argument('--clean_validation',
                        dest='clean_validation',
                        action='store_true')
    parser.set_defaults(clean_validation=False)

    parser.add_argument('--model_class',
                        '-m',
                        type=str,
                        default='StandardClassifier')
    parser.add_argument('--load_from', type=str, default=None)
    parser.add_argument('--grad_weight_decay', '-L', type=float, default=0.0)
    parser.add_argument('--lamb', type=float, default=1.0)
    parser.add_argument('--pretrained_arg', '-r', type=str, default=None)
    parser.add_argument('--sample_from_q',
                        action='store_true',
                        dest='sample_from_q')
    parser.set_defaults(sample_from_q=False)
    parser.add_argument('--q_dist',
                        type=str,
                        default='Gaussian',
                        choices=['Gaussian', 'Laplace', 'dot'])
    parser.add_argument('--weight_decay', type=float, default=0.0)

    parser.add_argument('--lr', type=float, default=1e-4, help='Learning rate')

    parser.add_argument('--k',
                        '-k',
                        type=int,
                        required=False,
                        default=10,
                        help='width parameter of ResNet18-k')
    parser.add_argument('--exclude_percent', type=float,
                        default=0.0)  # TODO: make this argument work
    args = parser.parse_args()
    print(args)

    # Load data
    train_loader, val_loader, test_loader, _ = load_data_from_arguments(args)

    # Options
    optimization_args = {
        'optimizer': {
            'name': 'adam',
            'lr': args.lr,
            'weight_decay': args.weight_decay
        }
    }

    with open(args.config, 'r') as f:
        architecture_args = json.load(f)

        # set the width parameter k
        if ('classifier' in architecture_args
                and architecture_args['classifier'].get(
                    'net', '').find('double-descent') != -1):
            architecture_args['classifier']['k'] = args.k
        if ('q-network' in architecture_args
                and architecture_args['classifier'].get(
                    'net', '').find('double-descent') != -1):
            architecture_args['q-network']['k'] = args.k

    model_class = getattr(methods, args.model_class)

    model = model_class(input_shape=train_loader.dataset[0][0].shape,
                        architecture_args=architecture_args,
                        pretrained_arg=args.pretrained_arg,
                        device=args.device,
                        grad_weight_decay=args.grad_weight_decay,
                        lamb=args.lamb,
                        sample_from_q=args.sample_from_q,
                        q_dist=args.q_dist,
                        load_from=args.load_from,
                        loss_function='ce')

    metrics_list = [metrics.Accuracy(output_key='pred')]
    if args.dataset == 'imagenet':
        metrics_list.append(metrics.TopKAccuracy(k=5, output_key='pred'))

    callbacks_list = [
        callbacks.SaveBestWithMetric(metric=metrics_list[0],
                                     partition='val',
                                     direction='max')
    ]

    stopper = callbacks.EarlyStoppingWithMetric(
        metric=metrics_list[0],
        stopping_param=args.stopping_param,
        partition='val',
        direction='max')

    training.train(model=model,
                   train_loader=train_loader,
                   val_loader=val_loader,
                   epochs=args.epochs,
                   save_iter=args.save_iter,
                   vis_iter=args.vis_iter,
                   optimization_args=optimization_args,
                   log_dir=args.log_dir,
                   args_to_log=args,
                   stopper=stopper,
                   metrics=metrics_list,
                   callbacks=callbacks_list,
                   device_ids=args.all_device_ids)

    # test the last model and best model
    models_to_test = [{
        'name': 'best',
        'file': 'best_val_accuracy.mdl'
    }, {
        'name': 'final',
        'file': 'final.mdl'
    }]
    for spec in models_to_test:
        print("Testing the {} model...".format(spec['name']))
        model = utils.load(os.path.join(args.log_dir, 'checkpoints',
                                        spec['file']),
                           methods=methods,
                           device=args.device)
        pred = utils.apply_on_dataset(model,
                                      test_loader.dataset,
                                      batch_size=args.batch_size,
                                      output_keys_regexp='pred',
                                      description='Testing')['pred']
        labels = [p[1] for p in test_loader.dataset]
        labels = torch.tensor(labels, dtype=torch.long)
        labels = utils.to_cpu(labels)
        with open(
                os.path.join(args.log_dir,
                             '{}_test_predictions.pkl'.format(spec['name'])),
                'wb') as f:
            pickle.dump({'pred': pred, 'labels': labels}, f)

        accuracy = torch.mean((pred.argmax(dim=1) == labels).float())
        with open(
                os.path.join(args.log_dir,
                             '{}_test_accuracy.txt'.format(spec['name'])),
                'w') as f:
            f.write("{}\n".format(accuracy))