示例#1
0
def train(network_class, network_name, epochs, algo, use_saved_model=True):
    net = network_class(lr).to(device)

    save_path = f'model_params/{network_name}'

    # if a saved model exists, use that
    if use_saved_model:
        try:
            saved_params = torch.load(save_path)
            net.load_state_dict(saved_params)
            return net
        # if not, train a new one and save it
        except FileNotFoundError:
            pass

    # training loop
    for epoch in range(epochs):
        # Take an optimization step and visualize if necessary
        data = sample_data(batch_size).to(device)
        stats = algo.step(net, data)
        if epoch % vis_iter == vis_iter - 1:
            if isinstance(stats, tuple):
                line(epoch, *stats)
            else:
                for stat in stats:
                    line(epoch, *stat)
        if epoch % 500 == 499:
            with torch.no_grad():
                algo.vis(net)

    # save final model
    torch.save(net.state_dict(), save_path)

    return net
示例#2
0
def train(hparams):
    #wandb.init(project="ebm-gaussians")

    seed_everything(hparams.seed)
    model = mlp(sizes=[2, 100, 100, 1], activation=nn.ReLU)
    optimizer = Adam(model.parameters(), lr=hparams.lr)

    # load dataset
    N_train = 5000

    X_train = sample_data(N_train)

    train_dl = DataLoader(X_train, batch_size=100, shuffle=True, num_workers=8)
    losses = []

    for _ in range(hparams.n_epochs):
        for x in train_dl:

            neg_x = torch.randn_like(x)
            neg_x = sample_langevin(neg_x, model, hparams.stepsize,
                                    hparams.n_steps)

            optimizer.zero_grad()

            pos_out = model(x)
            neg_out = model(neg_x)

            loss = (pos_out -
                    neg_out) + hparams.alpha * (pos_out**2 + neg_out**2)

            loss = loss.mean()
            loss.backward()

            #torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=0.1)
            optimizer.step()

            losses.append(loss.item())
            # wandb.log({'loss': loss.item()})

    print('saving a trained model')
    torch.save(model, hparams.model_path)
示例#3
0
def train(generator, discriminator, step_it, train_loader, val_loader, options,
          g_optimizer, e_optimizer):
    alpha_step = 1. / (float(options.n_iters_per_step) * 1.)

    aggressive_flag = args.aggressive

    mse_criterion = nn.MSELoss()
    mse_criterion.to(device)
    util.requires_grad(generator, True)
    util.requires_grad(encoder, True)
    generator.train()
    encoder.train()

    pre_mi = best_mi = mi_not_improved = 0
    starting_it = n_epochs = 0

    if args.checkpoint is not None:
        starting_it = checkpoint['iter']
        n_epochs = checkpoint['n_epochs']
        pre_mi = checkpoint['pre_mi']
        best_mi = checkpoint['best_mi']
        mi_not_improved = checkpoint['mi_not_improved']
        prev_aggressive_flag = checkpoint['aggressive_flag']
        if (prev_aggressive_flag != aggressive_flag):
            print("overwriting aggressive flag from checkpoint state")

    for step in range(step_it, step_it + options.num_steps):
        tr_data_loader = sample_data(train_loader, 4 * 2**step)
        valid_data_loader = sample_data(valid_loader, 4 * 2**step)
        dataset = iter(tr_data_loader)
        fixed_image, label = next(dataset)
        utils.save_image(fixed_image,
                         'sample/' + str(step) + '_fixed.png',
                         nrow=5,
                         normalize=True,
                         range=(-1, 1))

        for i in range(starting_it, options.n_iters_per_step):
            n_iters = (step - step_it) * options.n_iters_per_step + i
            encoder.zero_grad()
            generator.zero_grad()
            g_optimizer.zero_grad()
            e_optimizer.zero_grad()

            alpha = min(1, alpha_step * float(i))

            try:
                real_image, label = next(dataset)
            except (OSError, StopIteration):
                dataset = iter(tr_data_loader)
                real_image, label = next(dataset)

            b_size = real_image.size(0)
            real_image = real_image.to(device)

            z, kld = encoder(real_image, step=step, alpha=alpha)
            reconstructed_image = generator(z, step=step, alpha=alpha)

            mse = mse_criterion(real_image, reconstructed_image)
            loss = mse + args.beta * kld.mean()
            loss.backward()

            if aggressive_flag:
                if not (n_iters % args.aggr_tr_ratio == 0):
                    e_optimizer.step()
                else:
                    g_optimizer.step()
            else:
                e_optimizer.step()
                g_optimizer.step()

            if (n_iters % len(tr_data_loader.dataset)) == 0:
                if n_iters != 0:
                    n_epochs += 1

            if aggressive_flag and (n_iters % args.mi_valid_count) == 0:
                with torch.no_grad():
                    cur_mi = util.calc_mutual_info(encoder, valid_data_loader,
                                                   step, alpha, device)

                if cur_mi - best_mi < 0:
                    mi_not_improved += 1
                    if mi_not_improved == 5:
                        aggressive_flag = False
                        print("STOP BURNING")
                else:
                    best_mi = cur_mi

                pre_mi = cur_mi

            print(
                "step: %d, itr: %d, mse: %.3f, kld: %.3f, loss: %.3f, mutual_info: %.3f"
                % (step, i, mse, kld.mean(), loss, pre_mi))
            writer.add_scalar('data/mse', mse, n_iters)
            writer.add_scalar('data/kld', kld.mean(), n_iters)
            writer.add_scalar('data/loss', loss, n_iters)
            writer.add_scalar('data/mutual_info', pre_mi, n_epochs)
            writer.add_scalar('params/alpha', alpha, n_iters)
            writer.add_scalar('params/step_itr', step, n_iters)
            writer.add_scalar('params/image_size', 4 * 2**step, n_iters)
            writer.add_scalar('params/n_epochs', n_epochs, n_iters)

            if (i + 1) % 1000 == 0:
                with torch.no_grad():
                    z_f, kld_f = encoder(fixed_image, step=step, alpha=alpha)
                    reconstructed_fixed = generator(z_f,
                                                    step=step,
                                                    alpha=alpha)
                    utils.save_image(reconstructed_fixed,
                                     'sample/' + str(step) + '_' +
                                     str(i + 1).zfill(6) +
                                     '_reconstructed.png',
                                     nrow=5,
                                     normalize=True,
                                     range=(-1, 1))

            if (i + 1) % 10000 == 0:
                torch.save(
                    {
                        'step': step,
                        'iter': i,
                        'n_epochs': n_epochs,
                        'random_seed': random_seed,
                        'valid_size': args.valid_size,
                        'aggressive_flag': aggressive_flag,
                        'pre_mi': pre_mi,
                        'best_mi': best_mi,
                        'mi_not_improved': mi_not_improved,
                        'generator_state_dict': generator.module.state_dict(),
                        'encoder_state_dict': encoder.module.state_dict(),
                        'g_optimizer_state_dict': g_optimizer.state_dict(),
                        'e_optimizer_state_dict': e_optimizer.state_dict()
                    }, 'checkpoint/' + str(step) + '_' + str(i + 1).zfill(6) +
                    '.model')

        starting_it = 0
def train(generator, discriminator, step_it, loader, options, g_optimizer, e_optimizer, starting_it = 0 ):
    
    alpha_step = 1. / ( float(options.n_iters_per_step) * 1.)

    mse_criterion = nn.MSELoss()
    mse_criterion.to(device)
    util.requires_grad(generator, True)
    util.requires_grad(encoder, True)
    generator.train()
    encoder.train()

    for step in range( step_it, step_it + options.num_steps ):
        data_loader = sample_data(loader, 4 * 2 ** step )
        dataset = iter(data_loader)
        fixed_image, label = next(dataset)
        utils.save_image(
            fixed_image,
            'sample/'+str(step)+'_fixed.png',
            nrow=3,
            normalize=True,
            range=(-1, 1))
        for i in range(starting_it, options.n_iters_per_step ):
            n_iters = (step - step_it) * options.n_iters_per_step + i
            encoder.zero_grad()
            generator.zero_grad()
            g_optimizer.zero_grad()
            e_optimizer.zero_grad()

            alpha = min( 1, alpha_step * float(i) )
            
            try:
                real_image, label = next(dataset)
            except (OSError, StopIteration):
                dataset = iter(data_loader)
                real_image, label = next(dataset)
            
            b_size = real_image.size(0)
            real_image = real_image.to(device)

            z, kld = encoder( real_image, step=step, alpha=alpha)
            reconstructed_image = generator(z, step=step, alpha=alpha)
            
            mse = mse_criterion( real_image, reconstructed_image )
            loss = mse + args.beta * kld.mean()
            loss.backward()
            e_optimizer.step()
            g_optimizer.step()
            print("step: %d, itr: %d, mse: %f, kld: %f, loss total: %f" % (step, i, mse, kld.mean(), loss) )
            writer.add_scalar('data/mse', mse, n_iters)
            writer.add_scalar('data/kld', kld.mean(), n_iters)
            writer.add_scalar('data/loss', loss, n_iters)
            writer.add_scalar('params/alpha', alpha, n_iters)
            writer.add_scalar('params/step_itr', step, n_iters)
            writer.add_scalar('params/image_size', 4 * 2 ** step, n_iters)

            if (i + 1) % 1000 == 0:
                with torch.no_grad():
                    z_f, kld_f = encoder( fixed_image, step=step, alpha=alpha)
                    reconstructed_fixed = generator(z_f, step=step, alpha=alpha)
                    utils.save_image(
                    reconstructed_fixed,
                    'sample/'+str(step)+'_'+str(i + 1).zfill(6)+'_reconstructed.png',
                    nrow=3,
                    normalize=True,
                    range=(-1, 1))

            if (i + 1) % 10000 == 0:
                torch.save({
                 'step': step,
                 'iter' : i,
                 'generator_state_dict':   generator.module.state_dict(),
                 'encoder_state_dict':   encoder.module.state_dict(),
                 'g_optimizer_state_dict': g_optimizer.state_dict(),
                 'e_optimizer_state_dict': e_optimizer.state_dict()
                }, 'checkpoint/'+str(step)+'_'+str(i + 1).zfill(6)+'.model')
        
        starting_it = 0
示例#5
0
def train(args,
          dataset,
          generator,
          g_running,
          discriminator,
          mask_loss_fn,
          logger,
          log_dir,
          step=None,
          gen_every=100):
    if step is None:
        step = int(math.log2(args.init_size)) - 2
    resolution = 4 * 2**step
    loader = sample_data(dataset,
                         args.batch.get(resolution, args.batch_default),
                         resolution,
                         num_workers=args.num_workers,
                         org_to_crop=args.org_to_crop,
                         shuffle=True,
                         drop_last=False)
    data_loader = iter(loader)

    # log_real_images(loader, logger, 0, step)

    adjust_lr(g_optimizer, args.lr.get(resolution, 0.001))
    adjust_lr(d_optimizer, args.lr_disc_mult * args.lr.get(resolution, 0.001))

    pbar = tqdm(range(3_000_000))

    requires_grad(generator, False)
    requires_grad(discriminator, True)

    disc_loss_val = 0
    gen_loss_val = 0
    grad_loss_val = 0

    used_sample = args.used_sample

    perturbed_outputs = generator.module.perturber.perturbs()

    gan_sampler_ = NormalNoiseSampler()
    gan_sampler = lambda bsize: gan_sampler_(b_size, code_size)
    real_samples = get_first_n_images(loader, 64)
    logger.log_images(real_samples, tag='real_samples', step=0, epoch=step)
    gen_i, gen_j = 8, 8
    fixed_noise = [
        torch.randn(gen_j, code_size).to(device) for _ in range(gen_i)
    ]

    for i in pbar:
        d_optimizer.zero_grad()

        alpha = min(1., 1. / args.phase *
                    (used_sample + 1)) if resolution != args.init_size else 1.

        if used_sample > args.phase * 2:
            step += 1
            save(f'{log_dir}/train_step-{step}.model', generator, g_running,
                 discriminator, g_optimizer, d_optimizer, alpha, step - 1)

            if step > int(math.log2(args.max_size)) - 2:
                break
            else:
                alpha = 0
                used_sample = 0

            resolution = 4 * 2**step

            loader = sample_data(
                dataset,
                args.batch.get(resolution, args.batch_default),
                resolution,
                num_workers=args.num_workers,
                org_to_crop=args.org_to_crop,
                drop_last=False,
            )
            log_real_images(loader, logger, i, step)

            data_loader = iter(loader)
            adjust_lr(g_optimizer, args.lr.get(resolution, 0.001))
            adjust_lr(d_optimizer,
                      args.lr_disc_mult * args.lr.get(resolution, 0.001))

        # Discriminator - real images
        try:
            real_image, label = next(data_loader)
        except (OSError, StopIteration):
            data_loader = iter(loader)
            real_image, label = next(data_loader)

        used_sample += real_image.shape[0]

        b_size = real_image.size(0)
        real_image = real_image.to(device)

        metrics = {}
        if args.loss == 'wgan-gp':
            loss = torch.tensor(0.0, device=device)
            real_predict = discriminator(real_image, step=step, alpha=alpha)
            real_predict_mean = real_predict.mean()
            dx = real_predict_mean.item()

            real_predict = real_predict_mean - args.real_penalty * (
                real_predict**2).mean()
            loss -= real_predict
            loss_ = loss.item()
            loss.backward()
            metrics['Dstep_D_x'] = dx
            metrics['lossD_real'] = loss_

        elif args.loss == 'r1':
            raise NotImplementedError

        # Discriminator - fake images
        mixing_range = (-1, -1)
        if args.mixing and random.random() < 0.9:
            gen_in11, gen_in12, gen_in21, gen_in22 = torch.randn(
                4, b_size, code_size, device=device).chunk(4, 0)
            gen_in1 = [gen_in11.squeeze(0), gen_in12.squeeze(0)]
            gen_in2 = [gen_in21.squeeze(0), gen_in22.squeeze(0)]
            if args.same_mixing:
                mixing_range = (random.sample(list(range(step)), 1)[0], 100)
        else:
            gen_in1, gen_in2 = gan_sampler(b_size).to(device), gan_sampler(
                b_size).to(device)

        fake_image = generator(gen_in1,
                               step=step,
                               alpha=alpha,
                               mixing_range=mixing_range)[0]
        fake_d_input = fake_image
        fake_predict = discriminator(fake_d_input, step=step, alpha=alpha)

        if args.loss == 'wgan-gp':
            fake_predict = fake_predict.mean()
            fake_predict.backward()
            eps = torch.rand(fake_image.size(0), 1, 1, 1).to(device)
            x_hat = eps * real_image.data + (1 - eps) * fake_d_input.data
            x_hat.requires_grad = True
            hat_predict = discriminator(x_hat, step=step, alpha=alpha)
            grad_x_hat = grad(outputs=hat_predict.sum(),
                              inputs=x_hat,
                              create_graph=True)[0]
            grad_penalty = (
                (grad_x_hat.view(grad_x_hat.size(0), -1).norm(2, dim=1) -
                 1)**2).mean()
            grad_penalty = 10 * grad_penalty
            grad_penalty.backward()
            grad_loss_val = grad_penalty.item()

            metrics['Dstep_D_Gz'] = fake_predict.item()
            metrics['lossD_fake'] = fake_predict.item()
            metrics['lossD'] = fake_predict.item() - real_predict.item()
            metrics['grad_penalty'] = grad_loss_val

        elif args.loss == 'r1':
            raise NotImplementedError

        disc_loss_val = metrics['lossD']
        d_optimizer.step()
        d_optimizer.zero_grad()

        # Generator update
        if i % n_critic == 0:
            g_optimizer.zero_grad()
            loss = torch.tensor(0.0, device=device)

            requires_grad(generator, True)
            requires_grad(discriminator, False)

            rendered, perturbed, X = generator(gen_in2,
                                               step=step,
                                               alpha=alpha,
                                               mixing_range=mixing_range)
            fake_d_input = rendered

            fake_predict = discriminator(fake_d_input, step=step, alpha=alpha)
            predict = fake_predict
            predict_mean = predict.mean()

            if args.loss == 'wgan-gp':
                loss -= predict_mean
            elif args.loss == 'r1':
                raise NotImplementedError

            # Mask loss
            mask = perturbed[1][1]
            mask_loss, mask_loss_dict = mask_loss_fn(mask)

            gen_loss_val = loss.item()
            loss += mask_loss

            metrics['Gstep_D_Gz'] = predict_mean.item()
            metrics['lossG'] = loss.item()
            metrics['lossG_fake'] = gen_loss_val
            metrics['min_mask_loss'] = mask_loss_dict['min_mask_loss'].item()
            metrics['bin_loss'] = mask_loss_dict['bin_loss'].item()

            loss.backward()
            g_optimizer.step()
            accumulate(g_running, generator.module)

            requires_grad(generator, False)
            requires_grad(discriminator, True)

        logger.log_metrics(metrics, i)

        if i % gen_every == 0:
            g_optimizer.zero_grad()
            generator.eval()
            img_keys = ['rendered', 'bg']
            if perturbed_outputs[0]:
                img_keys.append('bg_perturbed')

            img_keys.append(f'mask')
            img_keys.append(f'fg')
            img_keys.append(f'fgmask')
            if perturbed_outputs[1]:
                img_keys.append(f'mask_perturbed')
                img_keys.append(f'fg_perturbed')
                img_keys.append(f'fgmask_perturbed')
            img_keys.extend([ik + '_running' for ik in img_keys])

            img_dict = {img_key: [] for img_key in img_keys}
            with torch.no_grad():
                for fnoise in fixed_noise:
                    rendered, perturbed, X = generator(fnoise,
                                                       step=step,
                                                       alpha=alpha)

                    img_dict['rendered'].append(rendered.data.cpu())
                    img_dict['bg'].append(X[0].data.cpu())
                    fg, mask = X[1]

                    img_dict[f'fg'].append(fg.data.cpu())
                    img_dict[f'mask'].append(mask.data.cpu())
                    img_dict[f'fgmask'].append((fg * mask).data.cpu())

                    if perturbed_outputs[0]:
                        img_dict['bg_perturbed'].append(
                            perturbed[0].data.cpu())

                    if perturbed_outputs[1]:
                        fg, mask = perturbed[1]
                        img_dict[f'fg_perturbed'].append(fg.data.cpu())
                        img_dict[f'mask_perturbed'].append(mask.data.cpu())
                        img_dict[f'fgmask_perturbed'].append(
                            (fg * mask).data.cpu())

                    rendered, perturbed, X = g_running(fnoise,
                                                       step=step,
                                                       alpha=alpha)

                    img_dict['rendered_running'].append(rendered.data.cpu())
                    img_dict['bg_running'].append(X[0].data.cpu())
                    fg, mask = X[1]

                    img_dict[f'fg_running'].append(fg.data.cpu())
                    img_dict[f'mask_running'].append(mask.data.cpu())
                    img_dict[f'fgmask_running'].append((fg * mask).data.cpu())

                for key, imgs in img_dict.items():
                    if len(imgs) == 0:
                        continue
                    range_ = (0., 1.) if key.startswith('mask') else (-1., 1.)
                    logger.log_images(torch.cat(imgs, 0),
                                      i,
                                      step,
                                      key,
                                      range=range_)
            generator.train()

        if i % 10000 == 0:
            save(f'{log_dir}/train_step-{step}_{i}.model', generator,
                 g_running, discriminator, g_optimizer, d_optimizer, alpha,
                 step)

        state_msg = (
            f'Size: {4 * 2 ** step}; G: {gen_loss_val:.3f}; D: {disc_loss_val:.3f};'
            f' Grad: {grad_loss_val:.3f}; Alpha: {alpha:.5f}')

        pbar.set_description(state_msg)
示例#6
0
            if isinstance(stats, tuple):
                line(epoch, *stats)
            else:
                for stat in stats:
                    line(epoch, *stat)
        if epoch % 500 == 499:
            with torch.no_grad():
                algo.vis(net)

    # save final model
    torch.save(net.state_dict(), save_path)

    return net


#################
# FULL PIPELINE #
#################

# plot a large sample of the data (bad points included)
scatter(sample_data(500))

# train AE and GAN
# ae = train(AutoEncoder, 'ae', 20000, algos.ae.AeAlgo, use_saved_model=True)
vae = train(VariationalAutoEncoder,
            'vae',
            20000,
            algos.vae.VaeAlgo,
            use_saved_model=True)
# gan = train(Gan, 'gan', 20000, algos.gan.GanAlgo, use_saved_model=True)
# wgan = train(WGan, 'wgan', 20000, algos.wgan.WganAlgo, use_saved_model=True)
def ensemble_learning(sample_size=1800,
                      learners=8,
                      generations=8,
                      pop_size=500,
                      mut_rate=0.3,
                      cross_rate=0.6,
                      fit_weights=None,
                      max_depth=16,
                      cross_md=9,
                      multi_proc=False,
                      use_all_labels=False):
    """
    Ensemble learning algorithm

    :param sample_size: datasampling size, defaults to 100
    :type sample_size: int, optional
    :param learners: number of GP learners, defaults to 8
    :type learners: int, optional
    :param generations: number of generations, defaults to 8
    :type generations: int, optional
    :param pop_size: number of trees in parent group, defaults to 500
    :type pop_size: int, optional
    :param mut_rate: mutation rate, defaults to 0.3
    :type mut_rate: float, optional
    :param cross_rate: crossover rate, defaults to 0.6
    :type cross_rate: float, optional
    :param fit_weights: importance for all classifications + depth penalty, defaults to None
    :type fit_weights: list, optional
    :param max_depth: maximum depth of a tree, defaults to 16
    :type max_depth: int, optional
    :param cross_md: maximum depth of subtree to combine with child, defaults to 9
    :type cross_md: int, optional
    :param multi_proc: multiprocessing, defaults to False
    :type multi_proc: bool, optional
    :return: correctness rates
    :rtype: list
    """
    # None as default, due to danger of modifying default params
    if fit_weights is None:
        fit_weights = [0.5, 0.5, 0.]

    train, test = train_test_data(DATA)

    dfs = sample_data(train, learners, sample_size, use_all_labels)

    # Specify seed, otherwise could go wrong using multiple threads
    params = [(data.drop(columns=LABEL), CATS, data[LABEL], generations,
               pop_size, mut_rate, cross_rate, fit_weights, max_depth,
               cross_md, False, r.randrange(sys.maxsize)) for data in dfs]
    res = []
    if multi_proc:
        with Pool() as pool:
            res = pool.starmap(dt_gp, tqdm(params, total=len(params)))
    else:
        for func_param in tqdm(params):
            res.append(dt_gp(*func_param))

    labels = test[LABEL].to_numpy()

    res_class = np.array([tree.classify(test) for tree in tqdm(res)])
    ensemble = np.array(stats.mode(res_class))[0]

    equal = ensemble[ensemble == labels]
    result = []
    for i in range(len(fit_weights) - 1):
        result.append(len(equal[equal == i]) / len(labels[labels == i]))

    print(
        f"FINAL SCORE: TPR: {result[1]} and TNR: {result[0]} out of {len(labels)} labels used"
    )
    return result