Exemplo n.º 1
0
def bad_boy_vibes(tau=0, activation='tanh', cuda=False, num_epochs=50):
    """
    Runs a single layer MLP  with 2,000 hidden units on MNIST. The user can specify at what point in the eigenvalue
    spectrum should the regularizer start aka tau. The other parameters of the experiment are fixed: batch_size=2500,
    lr=0.001, number of realizations=3, regularization strengths=[0.1, 1, 5], slope values=[1.06, 1.04, 1.02, 1.00].
    :param tau: integer, at what point in the eigenvalue spectrum should we regularize
    """
    realizations = 3
    batch_size = 2500  # 1.25 times the widest layer in the network
    lr = 1e-3
    train_loader, _, full_loader = get_data(dataset='MNIST', batch_size=batch_size, _seed=0,
                                            validate=False, data_dir='')
    weights = [0, 1e-4, 1e-3, 1e-2, 1e-1]
    for w in weights:
        kwargs = {"dims": [(28 * 28, 2000), (2000, 10)],
                  "activation": activation, "architecture": "mlp",
                  "trainer": "vanilla",
                  "regularizer": "no",
                  'alpha_jacob': 1e-4,
                  'bn': True,
                  'alpha_spectra': 1,
                  'optimizer': 'adam',
                  'lr': lr,
                  'weight_decay': 0,
                  'cuda': cuda,
                  'eps': 0.3,
                  'only_last': True,
                  'gradSteps': 40,
                  'noRestarts': 1,
                  'lr_pgd': 1e-2,
                  'training_type': 'FGSM',
                  'slope': [1.00],
                  'eig_start': tau,
                  'weight_decay': w}
        models = [ModelFactory(**kwargs) for j in range(realizations)]
        for j in range(realizations):
            for epoch in tqdm(range(num_epochs)):
                models[j].train_epoch(train_loader)

        model_params = []
        for idx in range(len(models)):
            model_params.append((kwargs, models[idx].state_dict()))

        if w == 0:
            torch.save(model_params,
                       'experiment_1/vanilla' + '_activation=' + activation + '_epochs=' + str(num_epochs))
        else:
            torch.save(model_params,
                       'experiment_1/vanilla' + '_activation=' + activation + '_epochs=' + str(num_epochs) + '_w=' + str(w))
Exemplo n.º 2
0
def train_network(tau=10,
                  activation='tanh',
                  cuda=False,
                  num_epochs=100,
                  vanilla=False,
                  dataset='MNIST',
                  arch='mlp',
                  realizations=3,
                  flat=False,
                  save_dir='experiment_2/'):
    """
    Training script for running experiments for section 4.2 in paper.
    :param tau: integer, at what point in the eigenvalue spectrum should we regularize
    :param activation: string, what activation function to use (only choices are tanh and relu)
    :param cuda: boolean, flag indicating whether to use GPU or not
    :param num_epochs: integer, number of epochs to train model for
    :param vanilla: boolean, flag indicating whether to use spectral regularizer or not
    :param dataset: string, which dataset to use (choices are MNIST or CIFAR10)
    :param arch: string, whether to use an MLP ('mlp') or a CNN ('cnn')
    :param realizations: integer, number of random seeds to use
    :param save_dir: string, location where to save models
    """
    lr = 1e-4
    if arch == 'mlp':
        if flat:
            arch = 'mlp_flat'
        dims = [(28 * 28, 1_000), (1_000, 1_000), (1_000, 1_000), (1_000, 10)]
        batch_size = 1500
        dataset = 'MNIST'
    elif arch == 'cnn':
        if flat:
            arch = 'cnn_flat'
        lr = 1e-4
        if dataset == 'MNIST':
            dims = [2, (1, 16), (16, 32), (800, 1000), (1000, 10)]
            batch_size = 6000
        else:
            dims = [2, (3, 16), (16, 32), (1152, 1000), (1000, 10)]
            batch_size = 6000
    else:
        print("Doesnt exist!!")
    train_loader, _, full_loader = get_data(dataset=dataset,
                                            batch_size=batch_size,
                                            _seed=0,
                                            validate=False,
                                            data_dir='data/')

    if vanilla:
        train_loader, _, full_loader = get_data(dataset=dataset,
                                                batch_size=batch_size,
                                                _seed=0,
                                                validate=False,
                                                data_dir='data/')
        kwargs = {
            "dims": dims,
            "activation": activation,
            "architecture": arch,
            "trainer": "vanilla",
            "regularizer": "no",
            'alpha_jacob': 1e-4,
            'bn': False,
            'alpha_spectra': 1,
            'optimizer': 'adam',
            'lr': lr,
            'weight_decay': 0,
            'cuda': cuda,
            'eps': 0.3,
            'only_last': True,
            'gradSteps': 40,
            'noRestarts': 1,
            'lr_pgd': 1e-2,
            'training_type': 'FGSM',
            'slope': [1.00],
            'eig_start': tau
        }
        models = [ModelFactory(**kwargs) for j in range(realizations)]
        for j in range(realizations):
            for epoch in tqdm(range(num_epochs)):
                models[j].train_epoch(train_loader)

        model_params = []
        for idx in range(len(models)):
            model_params.append((kwargs, models[idx].state_dict()))

        torch.save(
            model_params, save_dir + dataset + '/vanilla_arch=' + arch +
            '_activation=' + activation + '_epochs=' + str(num_epochs))
    else:
        regularizers_strengths = [5., 2., 1.]
        # In[]
        "Load in data loader"
        X_full, _ = next(
            iter(full_loader))  # load in full training set for eigenvectors

        # In[]
        kwargs = {
            "dims": dims,
            "activation": activation,
            "architecture": arch,
            "trainer": "vanilla",
            "regularizer": "eig",
            'alpha_jacob': 1e-4,
            'bn': False,
            'alpha_spectra': 1.0,
            'optimizer': 'adam',
            'lr': lr,
            'weight_decay': 0,
            'cuda': cuda,
            'eps': 0.3,
            'only_last': True,
            'gradSteps': 40,
            'noRestarts': 1,
            'lr_pgd': 1e-2,
            'training_type': 'FGSM',
            'slope': 1.00,
            'eig_start': tau
        }

        counter = 0
        for reg_strength in regularizers_strengths:
            kwargs['alpha_spectra'] = reg_strength
            models = [ModelFactory(**kwargs) for j in range(realizations)]
            print('no vibes')
            for j in range(realizations):
                for epoch in tqdm(range(num_epochs)):
                    models[j].train_epoch(train_loader, X_full)

            model_params = []
            for idx in range(len(models)):
                model_params.append((kwargs, models[idx].state_dict()))

            torch.save(
                model_params,
                save_dir + dataset + '/tau=' + str(tau) + '_arch=' + arch +
                '_activation=' + activation + '_epochs=' + str(num_epochs) +
                '_alpha=' + str(1) + '_beta=' + str(reg_strength))
            counter += 1
            print(str(len(regularizers_strengths) - counter) + " combos left")
Exemplo n.º 3
0
def bad_boy(tau=10,
            activation='tanh',
            cuda=False,
            num_epochs=100,
            vanilla=False,
            dataset='CIFAR10',
            arch='cnn_flat',
            realizations=3):
    lr = 1e-4
    if arch == 'flat':
        dims = [(28 * 28, 1_000), (1_000, 1_000), (1_000, 1_000), (1_000, 10)]
        batch_size = 1500
        dataset = 'MNIST'
    elif arch == 'cnn_flat':
        lr = 1e-4
        if dataset == 'MNIST':
            dims = [2, (1, 16), (16, 32), (800, 1000), (1000, 10)]
            batch_size = 6000
        else:
            dims = [2, (3, 16), (16, 32), (1152, 1000), (1000, 10)]
            batch_size = 6000
    else:
        print("Doesnt exist!!")
    train_loader, _, full_loader = get_data(dataset=dataset,
                                            batch_size=batch_size,
                                            _seed=0,
                                            validate=False,
                                            data_dir='data/')

    if vanilla:
        train_loader, _, full_loader = get_data(dataset=dataset,
                                                batch_size=batch_size,
                                                _seed=0,
                                                validate=False,
                                                data_dir='data/')
        kwargs = {
            "dims": dims,
            "activation": activation,
            "architecture": arch,
            "trainer": "vanilla",
            "regularizer": "no",
            'alpha_jacob': 1e-4,
            'bn': False,
            'alpha_spectra': 1,
            'optimizer': 'adam',
            'lr': lr,
            'weight_decay': 0,
            'cuda': cuda,
            'eps': 0.3,
            'only_last': True,
            'gradSteps': 40,
            'noRestarts': 1,
            'lr_pgd': 1e-2,
            'training_type': 'FGSM',
            'slope': [1.00],
            'eig_start': tau,
            'demean': True
        }
        models = [ModelFactory(**kwargs) for j in range(realizations)]
        for j in range(realizations):
            for epoch in tqdm(range(num_epochs)):
                models[j].train_epoch(train_loader)

        model_params = []
        for idx in range(len(models)):
            model_params.append((kwargs, models[idx].state_dict()))

        torch.save(
            model_params, 'experiment_2/' + dataset + '/flat_vanilla_arch=' +
            arch + '_activation=' + activation + '_epochs=' + str(num_epochs))
    else:
        regularizers_strengths = [5., 2., 1.]
        # In[]
        "Load in data loader"
        X_full, _ = next(
            iter(full_loader))  # load in full training set for eigenvectors

        # In[]
        kwargs = {
            "dims": dims,
            "activation": activation,
            "architecture": arch,
            "trainer": "vanilla",
            "regularizer": "eig",
            'alpha_jacob': 1e-4,
            'bn': False,
            'alpha_spectra': 1.0,
            'optimizer': 'adam',
            'lr': lr,
            'weight_decay': 0,
            'cuda': cuda,
            'eps': 0.3,
            'only_last': True,
            'gradSteps': 40,
            'noRestarts': 1,
            'lr_pgd': 1e-2,
            'training_type': 'FGSM',
            'slope': 1.00,
            'eig_start': tau,
            'demean': True
        }

        counter = 0
        for reg_strength in regularizers_strengths:
            kwargs['alpha_spectra'] = reg_strength
            models = [ModelFactory(**kwargs) for j in range(realizations)]
            print('no vibes')
            for j in range(realizations):
                for epoch in tqdm(range(num_epochs)):
                    models[j].train_epoch(train_loader, X_full)

            model_params = []
            for idx in range(len(models)):
                model_params.append((kwargs, models[idx].state_dict()))

            torch.save(
                model_params, 'experiment_2/' + dataset + '/flat_tau=' +
                str(tau) + '_arch=' + arch + '_activation=' + activation +
                '_epochs=' + str(num_epochs) + '_alpha=' + str(1) + '_beta=' +
                str(reg_strength))
            counter += 1
            print(str(len(regularizers_strengths) - counter) + " combos left")
Exemplo n.º 4
0
def bad_boy_vibes(tau=0, activation='tanh', cuda=False, num_epochs=50, parallel=False, vanilla=False, lr=1e-3):
    """
    Runs a single layer MLP  with 2,000 hidden units on MNIST. The user can specify at what point in the eigenvalue
    spectrum should the regularizer start aka tau. The other parameters of the experiment are fixed: batch_size=2500,
    lr=0.001, number of realizations=3, regularization strengths=[0.1, 1, 5], slope values=[1.06, 1.04, 1.02, 1.00].
    :param tau: integer, at what point in the eigenvalue spectrum should we regularize
    """
    realizations = 3
    batch_size = 3500  # 1.25 times the widest layer in the network
    train_loader, _, full_loader = get_data(dataset='MNIST', batch_size=batch_size, _seed=0,
                                            validate=False, data_dir='')
    if vanilla:
        kwargs = {"dims": [(28 * 28, 2000), (2000, 10)],
                  "activation": activation, "architecture": "mlp",
                  "trainer": "vanilla",
                  "regularizer": "no",
                  'alpha_jacob': 1e-4,
                  'bn': False,
                  'alpha_spectra': 1,
                  'optimizer': 'adam',
                  'lr': lr,
                  'weight_decay': 0,
                  'cuda': cuda,
                  'eps': 0.3,
                  'only_last': True,
                  'gradSteps': 40,
                  'noRestarts': 1,
                  'lr_pgd': 1e-2,
                  'training_type': 'FGSM',
                  'slope': [1.00],
                  'eig_start': tau}
        models = [ModelFactory(**kwargs) for j in range(realizations)]
        for j in range(realizations):
            for epoch in tqdm(range(num_epochs)):
                models[j].train_epoch(train_loader)

        model_params = []
        for idx in range(len(models)):
            model_params.append((kwargs, models[idx].state_dict()))

        torch.save(model_params,
                   'experiment_1/vanilla' + '_activation=' + activation + '_epochs=' + str(num_epochs))

    else:
        slopes = [1.00]
        regularizers_strengths = [1., 2., 5.]
        stuff_to_loop_over = product(slopes, regularizers_strengths)
        # In[]
        "Load in data loader"
        X_full, _ = next(iter(full_loader))  # load in full training set for eigenvectors

        # In[]
        kwargs = {"dims": [(28 * 28, 2000), (2000, 10)],
                  "activation": activation, "architecture": "mlp",
                  "trainer": "vanilla",
                  "regularizer": "eig",
                  'alpha_jacob': 1e-4,
                  'bn': False,
                  'alpha_spectra': 1,
                  'optimizer': 'adam',
                  'lr': lr,
                  'weight_decay': 0,
                  'cuda': cuda,
                  'eps': 0.3,
                  'only_last': True,
                  'gradSteps': 40,
                  'noRestarts': 1,
                  'lr_pgd': 1e-2,
                  'training_type': 'FGSM',
                  'slope': [1.00],
                  'eig_start': tau}

        counter = 0
        for (slope, reg_strength) in stuff_to_loop_over:
            kwargs['slope'] = slope
            kwargs['alpha_spectra'] = reg_strength
            models = [ModelFactory(**kwargs) for j in range(realizations)]
            if not parallel:
                print('no vibes')
                for j in range(realizations):
                    for epoch in tqdm(range(num_epochs)):
                        models[j].train_epoch(train_loader, X_full)
            else:
                print('vibes')
                del full_loader
                models = Parallel(n_jobs=realizations)(delayed(train)(models[j],
                                                                      batch_size,
                                                                      num_epochs,
                                                                      X_full) for j in range(realizations))

            model_params = []
            for idx in range(len(models)):
                model_params.append((kwargs, models[idx].state_dict()))

            torch.save(model_params, 'experiment_1/tau=' + str(tau) + '_activation=' + activation + '_epochs=' + str(num_epochs) + '_alpha=' + str(slope) + '_beta=' + str(reg_strength))
            counter += 1
            print(str(len(slopes) * len(regularizers_strengths) - counter) + " combos left")
Exemplo n.º 5
0
def train(model, batch_size, num_epochs, X_full):
    train_loader, _, _ = get_data(dataset='MNIST', batch_size=batch_size, _seed=np.random.randint(100),
                                                      validate=False, data_dir='')
    for epoch in tqdm(range(num_epochs)):
        model.train_epoch(train_loader, X_full)
    return model
Exemplo n.º 6
0
def train_network(tau=10,
                  activation='tanh',
                  cuda=False,
                  num_epochs=50,
                  vanilla=False,
                  lr=1e-3,
                  save_dir='experiment_1/'):
    """
    Runs a single layer MLP  with 2,000 hidden units on MNIST. The user can specify at what point in the eigenvalue
    spectrum should the regularizer start aka tau. The other parameters of the experiment are fixed: batch_size=2500,
    lr=0.001, number of realizations=3, regularization strengths=[0.1, 1, 5], slope values=[1.06, 1.04, 1.02, 1.00].
    :param tau: integer, at what point in the eigenvalue spectrum should we regularize
    :param activation: string, what activation function to use (only choices are tanh and relu)
    :param cuda: boolean, flag indicating whether to use GPU or not
    :param num_epochs: integer, number of epochs to train model for
    :param vanilla: boolean, flag indicating whether to use spectral regularizer or not
    :param lr: float, learning rate to use
    :param save_dir: string, location where to save models
    """
    realizations = 3
    batch_size = 3500  # 1.25 times the widest layer in the network
    train_loader, _, full_loader = get_data(dataset='MNIST',
                                            batch_size=batch_size,
                                            _seed=0,
                                            validate=False,
                                            data_dir='')
    if vanilla:
        kwargs = {
            "dims": [(28 * 28, 2000), (2000, 10)],
            "activation": activation,
            "architecture": "mlp",
            "trainer": "vanilla",
            "regularizer": "no",
            'alpha_jacob': 1e-4,
            'bn': False,
            'alpha_spectra': 1,
            'optimizer': 'adam',
            'lr': lr,
            'weight_decay': 0,
            'cuda': cuda,
            'eps': 0.3,
            'only_last': True,
            'gradSteps': 40,
            'noRestarts': 1,
            'lr_pgd': 1e-2,
            'training_type': 'FGSM',
            'slope': [1.00],
            'eig_start': tau
        }
        models = [ModelFactory(**kwargs) for j in range(realizations)]
        for j in range(realizations):
            for epoch in tqdm(range(num_epochs)):
                models[j].train_epoch(train_loader)

        model_params = []
        for idx in range(len(models)):
            model_params.append((kwargs, models[idx].state_dict()))

        torch.save(
            model_params, save_dir + 'vanilla' + '_activation=' + activation +
            '_epochs=' + str(num_epochs))

    else:
        slopes = [1.00]
        regularizers_strengths = [1., 2., 5.]
        stuff_to_loop_over = product(slopes, regularizers_strengths)
        # In[]
        "Load in data loader"
        X_full, _ = next(
            iter(full_loader))  # load in full training set for eigenvectors

        # In[]
        kwargs = {
            "dims": [(28 * 28, 2000), (2000, 10)],
            "activation": activation,
            "architecture": "mlp",
            "trainer": "vanilla",
            "regularizer": "eig",
            'alpha_jacob': 1e-4,
            'bn':
            False,  # Note that for results shown in section 4.1, we set bn=True
            'alpha_spectra': 1,
            'optimizer': 'adam',
            'lr': lr,
            'weight_decay': 0,
            'cuda': cuda,
            'eps': 0.3,
            'only_last': True,
            'gradSteps': 40,
            'noRestarts': 1,
            'lr_pgd': 1e-2,
            'training_type': 'FGSM',
            'slope': [1.00],
            'eig_start': tau
        }

        counter = 0
        for (slope, reg_strength) in stuff_to_loop_over:
            kwargs['slope'] = slope
            kwargs['alpha_spectra'] = reg_strength
            models = [ModelFactory(**kwargs) for j in range(realizations)]
            for j in range(realizations):
                for epoch in tqdm(range(num_epochs)):
                    models[j].train_epoch(train_loader, X_full)

            model_params = []
            for idx in range(len(models)):
                model_params.append((kwargs, models[idx].state_dict()))

            torch.save(
                model_params, save_dir + 'tau=' + str(tau) + '_activation=' +
                activation + '_epochs=' + str(num_epochs) + '_alpha=' +
                str(slope) + '_beta=' + str(reg_strength))
            counter += 1
            print(
                str(len(slopes) * len(regularizers_strengths) - counter) +
                " combos left")