num_workers=0,
                             drop_last=True)

# Generate random samples for test
random_samples = next(iter(dataloader_test))
feature_size = random_samples.size()[1]

###########################
## Privacy Calculation ####
###########################
if opt.dp_privacy:
    totalsamples = len(dataset_train_object)
    num_batches = len(dataloader_train)
    iterations = opt.n_epochs_pretrain * num_batches
    print('Achieves ({}, {})-DP'.format(
        analysis.epsilon(totalsamples, opt.batch_size, opt.noise_multiplier,
                         iterations, opt.delta),
        opt.delta,
    ))


####################
### Architecture ###
####################
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()
        n_channels_base = 4

        self.encoder = nn.Sequential(
            nn.Conv1d(in_channels=1,
                      out_channels=n_channels_base,
Exemplo n.º 2
0
def train(params):
    dataset = {
        'mimic': mimic_dataset,
        'credit': credit_dataset,
        'census': census_dataset,
    }[params['dataset']]

    _, train_dataset, validation_dataset, _ = dataset.get_datasets()
    x_validation = next(iter(DataLoader(validation_dataset, batch_size=len(validation_dataset)))).to(params['device'])

    autoencoder = Autoencoder(
        example_dim=np.prod(train_dataset[0].shape),
        compression_dim=params['compress_dim'],
        binary=params['binary'],
        device=params['device'],
    )

    decoder_optimizer = dp_optimizer.DPAdam(
        l2_norm_clip=params['l2_norm_clip'],
        noise_multiplier=params['noise_multiplier'],
        minibatch_size=params['minibatch_size'],
        microbatch_size=params['microbatch_size'],
        params=autoencoder.get_decoder().parameters(),
        lr=params['lr'],
        betas=(params['b1'], params['b2']),
        weight_decay=params['l2_penalty'],
    )

    encoder_optimizer = torch.optim.Adam(
        params=autoencoder.get_encoder().parameters(),
        lr=params['lr'] * params['microbatch_size'] / params['minibatch_size'],
        betas=(params['b1'], params['b2']),
        weight_decay=params['l2_penalty'],
    )

    autoencoder_loss = lambda inp, target: nn.BCELoss(reduction='none')(inp, target).sum(dim=1).mean(dim=0) if params['binary'] else nn.MSELoss()

    print('Achieves ({}, {})-DP'.format(
        analysis.epsilon(
            len(train_dataset),
            params['minibatch_size'],
            params['noise_multiplier'],
            params['iterations'],
            params['delta']
        ),
        params['delta'],
    ))

    minibatch_loader, microbatch_loader = sampling.get_data_loaders(
        minibatch_size=params['minibatch_size'],
        microbatch_size=params['microbatch_size'],
        iterations=params['iterations'],
    )

    iteration = 0
    train_losses, validation_losses = [], []
    for X_minibatch in minibatch_loader(train_dataset):
        encoder_optimizer.zero_grad()
        decoder_optimizer.zero_grad()
        for X_microbatch in microbatch_loader(X_minibatch):
            X_microbatch = X_microbatch.to(params['device'])
            decoder_optimizer.zero_microbatch_grad()
            output = autoencoder(X_microbatch)
            loss = autoencoder_loss(output, X_microbatch)
            loss.backward()
            decoder_optimizer.microbatch_step()
        encoder_optimizer.step()
        decoder_optimizer.step()

        validation_loss = autoencoder_loss(autoencoder(x_validation).detach(), x_validation)
        train_losses.append(loss.item())
        validation_losses.append(validation_loss.item())

        if iteration % 100 == 0:
            print ('[Iteration %d/%d] [Loss: %f] [Validation Loss: %f]' % (
                iteration, params['iterations'], loss.item(), validation_loss.item())
            )
        iteration += 1

    return autoencoder, pd.DataFrame(data={'train': train_losses, 'validation': validation_losses})
Exemplo n.º 3
0
            pass

        iterations = int(model_file[:-4])

        if private:
            minibatch_size = 128
            noise_multiplier = 1.1
            delta = 1.2871523321606923e-5

            from dp_autoencoder import Autoencoder
            from dp_wgan import Generator
            latent_dim = 64
            generator = torch.load(path + model_file)
            decoder = torch.load('dp_autoencoder.dat').get_decoder()

            epsilon = analysis.epsilon(len(train_dataset), minibatch_size,
                                       noise_multiplier, iterations, delta)

            body = 'N: {}\nb: {}\nSigma: {}\nT: {}\nEps: {}\nDelta: {}'.format(
                len(train_dataset), minibatch_size, noise_multiplier,
                iterations, epsilon, delta)

            with open(model_dir + 'eps.txt', 'w') as f:
                f.write(body)

        else:
            from autoencoder import Autoencoder
            from wgan import Generator
            latent_dim = 128
            generator = torch.load(path + model_file)
            decoder = torch.load('autoencoder.dat').get_decoder()
Exemplo n.º 4
0
def train(params):
    dataset = {
        'mimic': mimic_dataset,
    }[params['dataset']]

    _, train_dataset, _, _ = dataset.get_datasets()

    with open('dp_autoencoder.dat', 'rb') as f:
        autoencoder = torch.load(f)

    decoder = autoencoder.get_decoder()

    generator = Generator(
        input_dim=params['latent_dim'],
        output_dim=autoencoder.get_compression_dim(),
        binary=params['binary'],
        device=params['device'],
    )

    g_optimizer = torch.optim.RMSprop(
        params=generator.parameters(),
        lr=params['lr'],
        alpha=params['alpha'],
        weight_decay=params['l2_penalty'],
    )

    discriminator = Discriminator(
        input_dim=np.prod(train_dataset[0].shape),
        device=params['device'],
    )

    d_optimizer = dp_optimizer.DPRMSprop(
        l2_norm_clip=params['l2_norm_clip'],
        noise_multiplier=params['noise_multiplier'],
        minibatch_size=params['minibatch_size'],
        microbatch_size=params['microbatch_size'],
        params=discriminator.parameters(),
        lr=params['lr'],
        alpha=params['alpha'],
        weight_decay=params['l2_penalty'],
    )

    print('Achieves ({}, {})-DP'.format(
        analysis.epsilon(len(train_dataset), params['minibatch_size'],
                         params['noise_multiplier'], params['iterations'],
                         params['delta']),
        params['delta'],
    ))

    minibatch_loader, microbatch_loader = sampling.get_data_loaders(
        params['minibatch_size'],
        params['microbatch_size'],
        params['iterations'],
    )

    iteration = 0
    for X_minibatch in minibatch_loader(train_dataset):
        d_optimizer.zero_grad()
        for real in microbatch_loader(X_minibatch):
            real = real.to(params['device'])
            z = torch.randn(real.size(0),
                            params['latent_dim'],
                            device=params['device'],
                            requires_grad=False)
            fake = decoder(generator(z)).detach()

            d_optimizer.zero_microbatch_grad()
            d_loss = -torch.mean(discriminator(real)) + torch.mean(
                discriminator(fake))
            d_loss.backward()
            d_optimizer.microbatch_step()
        d_optimizer.step()

        for parameter in discriminator.parameters():
            parameter.data.clamp_(-params['clip_value'], params['clip_value'])

        if iteration % params['d_updates'] == 0:
            z = torch.randn(X_minibatch.size(0),
                            params['latent_dim'],
                            device=params['device'],
                            requires_grad=False)
            fake = decoder(generator(z))

            g_optimizer.zero_grad()
            g_loss = -torch.mean(discriminator(fake))
            g_loss.backward()
            g_optimizer.step()

        if iteration % 100 == 0:
            print('[Iteration %d/%d] [D loss: %f] [G loss: %f]' %
                  (iteration, params['iterations'], d_loss.item(),
                   g_loss.item()))
        iteration += 1

        if iteration % 1000 == 0:
            with open('dpwgans1/{}.dat'.format(iteration), 'wb') as f:
                torch.save(generator, f)

    return generator
Exemplo n.º 5
0
    else:
        weights.append(1.)
        ds.append((datatype, 1))

weights = torch.tensor(weights).to(ae_params['device'])

#autoencoder_loss = (lambda input, target: torch.mul(weights, torch.pow(input-target, 2)).sum(dim=1).mean(dim=0))
#autoencoder_loss = lambda input, target: torch.mul(weights, F.binary_cross_entropy(input, target, reduction='none')).sum(dim=1).mean(dim=0)
autoencoder_loss = nn.BCELoss()
#autoencoder_loss = nn.MSELoss()

print(autoencoder)

print('Achieves ({}, {})-DP'.format(
    analysis.epsilon(len(X_train_encoded), ae_params['minibatch_size'],
                     ae_params['noise_multiplier'], ae_params['iterations'],
                     ae_params['delta']),
    ae_params['delta'],
))

minibatch_loader, microbatch_loader = sampling.get_data_loaders(
    minibatch_size=ae_params['minibatch_size'],
    microbatch_size=ae_params['microbatch_size'],
    iterations=ae_params['iterations'],
    nonprivate=ae_params['nonprivate'],
)

train_losses, validation_losses = [], []

X_train_encoded = X_train_encoded.to(ae_params['device'])
X_test_encoded = X_test_encoded.to(ae_params['device'])