Пример #1
0
def demo_mnist(opt, epochs=10, btsz=100,
        lr = 0.1, beta = 0.9,
        eta0 = 0.0005, mu=0.02, lmbd=0.99,
        w=None):
    """
    """
    from misc import load_mnist
    from losses import zero_one
    from opt import msgd, smd
    #
    trainset, valset, testset = load_mnist()
    inputs, targets = trainset
    test_in, test_tar = testset
    #
    di = inputs.shape[1]
    dt = np.max(targets) + 1
    # setup weights
    if w is None:
        if opt is smd:
            # needs complex initialization
            weights = np.zeros((di*dt+dt), dtype=np.complex)
            weights[:] = 0.001 * np.random.randn(di*dt+dt)
        else:
            weights = 0.* np.random.randn(di*dt+dt)
        weights[-dt:] = 0.
    else:
        print "Continue with provided weights w."
        weights = w
    #
    print "Training starts..."
    params = dict()
    params["x0"] = weights
    if opt is msgd or opt is smd:
        params["fandprime"] = score_grad_xe
        params["nos"] = inputs.shape[0]
        params["args"] = {}
        params["batch_args"] = {"inputs": inputs, "targets": targets}
        params["epochs"] = epochs
        params["btsz"] = btsz
        # msgd
        params["beta"] = beta
        params["lr"] = lr 
        # smd
        params["eta0"] = eta0
        params["mu"] = mu
        params["lmbd"] = lmbd
        params["verbose"] = True
    else:
        # opt from scipy
        params["func"] = score_xe
        params["fprime"] = grad_xe
        params["args"] = (inputs, targets)
        params["maxfun"] = epochs
        params["m"] = 50
    weights = opt(**params)[0]
    print "Training done."
    #
    print "Test set preformance:",\
            zero_one(predict(weights, test_in), test_tar)
    return weights
Пример #2
0
def demo_mnist(hiddens, opt, l2=1e-6, epochs=10, 
        lr=1e-4, beta=0., btsz=128, eta0 = 0.0005, 
        mu=0.02, lmbd=0.99, weightvar=0.01, 
        w=None):
    """
    """
    from misc import sigmoid, load_mnist
    from losses import xe, zero_one
    from opt import msgd, smd 
    #
    trainset, valset, testset = load_mnist()
    inputs, targets = trainset
    test_in, test_tar = testset
    di = inputs.shape[1]
    dt = np.max(targets) + 1
    structure = {}
    structure["hdim"] = hiddens
    structure["odim"] = dt
    structure["af"] = np.tanh
    structure["score"] = xe
    structure["l2"] = l2
    # get weight initialized
    if w is None:
        weights = np.zeros(di*hiddens + hiddens + hiddens*dt + dt)
        weights[:hiddens*di] = 0.001 * np.random.randn(di*hiddens)
        weights[hiddens*(di+1):-dt] = 0.001 * np.random.randn(hiddens*dt)
        if opt is smd:
            # needs complex weights
            weights = np.asarray(weights, dtype=np.complex)
    else:
        print "Continue with provided weights w."
        weights = w
    #
    print "Training starts..."
    params = dict()
    params["x0"] = weights
    params["fandprime"] = score_grad
    if opt is msgd or opt is smd:
        params["nos"] = inputs.shape[0]
        params["args"] = {"structure": structure}
        params["batch_args"] = {"inputs": inputs, "targets": targets}
        params["epochs"] = epochs
        params["btsz"] = btsz
        params["verbose"] = True
        # for msgd
        params["lr"] = lr
        params["beta"] = beta
        # for smd
        params["eta0"] = eta0
        params["mu"] = mu
        params["lmbd"] = lmbd
    else:
        params["args"] = (structure, inputs, targets)
        params["maxfun"] = epochs
        # for lbfgs
        params["m"] = 25
    
    weights = opt(**params)[0]
    print "Training done."
    
    # Evaluate on test set
    test_perf = zero_one(predict(weights, structure, test_in), test_tar)
    print "Test set performance:", test_perf
    return weights
Пример #3
0
def demo_mnist(hiddens,
               opt,
               l2=1e-6,
               epochs=10,
               lr=1e-4,
               beta=0.,
               btsz=128,
               eta0=0.0005,
               mu=0.02,
               lmbd=0.99,
               weightvar=0.01,
               w=None):
    """
    """
    from misc import sigmoid, load_mnist
    from losses import xe, zero_one
    from opt import msgd, smd
    #
    trainset, valset, testset = load_mnist()
    inputs, targets = trainset
    test_in, test_tar = testset
    di = inputs.shape[1]
    dt = np.max(targets) + 1
    structure = {}
    structure["hdim"] = hiddens
    structure["odim"] = dt
    structure["af"] = np.tanh
    structure["score"] = xe
    structure["l2"] = l2
    # get weight initialized
    if w is None:
        weights = np.zeros(di * hiddens + hiddens + hiddens * dt + dt)
        weights[:hiddens * di] = 0.001 * np.random.randn(di * hiddens)
        weights[hiddens * (di + 1):-dt] = 0.001 * np.random.randn(hiddens * dt)
        if opt is smd:
            # needs complex weights
            weights = np.asarray(weights, dtype=np.complex)
    else:
        print "Continue with provided weights w."
        weights = w
    #
    print "Training starts..."
    params = dict()
    params["x0"] = weights
    params["fandprime"] = score_grad
    if opt is msgd or opt is smd:
        params["nos"] = inputs.shape[0]
        params["args"] = {"structure": structure}
        params["batch_args"] = {"inputs": inputs, "targets": targets}
        params["epochs"] = epochs
        params["btsz"] = btsz
        params["verbose"] = True
        # for msgd
        params["lr"] = lr
        params["beta"] = beta
        # for smd
        params["eta0"] = eta0
        params["mu"] = mu
        params["lmbd"] = lmbd
    else:
        params["args"] = (structure, inputs, targets)
        params["maxfun"] = epochs
        # for lbfgs
        params["m"] = 25

    weights = opt(**params)[0]
    print "Training done."

    # Evaluate on test set
    test_perf = zero_one(predict(weights, structure, test_in), test_tar)
    print "Test set performance:", test_perf
    return weights
Пример #4
0
def demo_mnist(opt,
               epochs=10,
               btsz=100,
               lr=0.1,
               beta=0.9,
               eta0=0.0005,
               mu=0.02,
               lmbd=0.99,
               w=None):
    """
    """
    from misc import load_mnist
    from losses import zero_one
    from opt import msgd, smd
    #
    trainset, valset, testset = load_mnist()
    inputs, targets = trainset
    test_in, test_tar = testset
    #
    di = inputs.shape[1]
    dt = np.max(targets) + 1
    # setup weights
    if w is None:
        if opt is smd:
            # needs complex initialization
            weights = np.zeros((di * dt + dt), dtype=np.complex)
            weights[:] = 0.001 * np.random.randn(di * dt + dt)
        else:
            weights = 0. * np.random.randn(di * dt + dt)
        weights[-dt:] = 0.
    else:
        print "Continue with provided weights w."
        weights = w
    #
    print "Training starts..."
    params = dict()
    params["x0"] = weights
    if opt is msgd or opt is smd:
        params["fandprime"] = score_grad_xe
        params["nos"] = inputs.shape[0]
        params["args"] = {}
        params["batch_args"] = {"inputs": inputs, "targets": targets}
        params["epochs"] = epochs
        params["btsz"] = btsz
        # msgd
        params["beta"] = beta
        params["lr"] = lr
        # smd
        params["eta0"] = eta0
        params["mu"] = mu
        params["lmbd"] = lmbd
        params["verbose"] = True
    else:
        # opt from scipy
        params["func"] = score_xe
        params["fprime"] = grad_xe
        params["args"] = (inputs, targets)
        params["maxfun"] = epochs
        params["m"] = 50
    weights = opt(**params)[0]
    print "Training done."
    #
    print "Test set preformance:",\
            zero_one(predict(weights, test_in), test_tar)
    return weights
Пример #5
0
def train(args):
    if args.dataset.lower() == 'celeba':
        train_loader, _, _ = misc.load_celebA(args.batch_s, args.img_s)
        img_c = 3
    elif args.dataset.lower() == 'lsun':
        train_loader, val_loader, _ = misc.load_LSUN(args.batch_s, args.img_s)
        img_c = 3
    elif args.dataset.lower() == 'imagenet':
        train_loader, val_loader, _ = misc.load_imagenet(
            args.batch_s, args.img_s)
        img_c = 3
    elif args.dataset.lower() == 'mnist':
        train_loader, val_loader, _ = misc.load_mnist(args.batch_s, args.img_s)
        img_c = 1
    else:
        raise NotImplementedError

    fm_gen = [
        args.base_fm_n * pow(2, args.layer_n - 1 - l)
        for l in range(args.layer_n)
    ]
    fm_disc = [args.base_fm_n * pow(2, l) for l in range(args.layer_n)]

    gen = model.Generator(args.z_dim, img_c, fm_gen).cuda()
    gen.apply(model.init_weights)
    disc = model.Discriminator(img_c, fm_disc).cuda()
    disc.apply(model.init_weights)

    criterion = nn.BCELoss()
    label_real = 1
    label_fake = 0

    optim_gen = optim.Adam(gen.parameters(),
                           lr=args.learning_rate,
                           betas=(args.beta1, 0.999))
    optim_disc = optim.Adam(disc.parameters(),
                            lr=args.learning_rate,
                            betas=(args.beta1, 0.999))

    if args.resume:
        filename = args.ckpt_dir + args.resume
        if os.path.isfile(filename):
            print("==> loading checkpoint '{}'".format(filename))
            checkpoint = torch.load(filename)
            start_epoch = checkpoint['epoch'] + 1
            gen.load_state_dict(checkpoint['state_dict_gen'])
            disc.load_state_dict(checkpoint['state_dict_disc'])
            optim_gen.load_state_dict(checkpoint['optimizer_gen'])
            optim_disc.load_state_dict(checkpoint['optimizer_disc'])
            print("==> loaded checkpoint '{}' (epoch {})".format(
                filename, checkpoint['epoch']))
        else:
            print("==> no checkpoint found at '{}'".format(filename))
    else:
        start_epoch = 0

    if not os.path.isdir(args.img_dir):
        os.system('mkdir ' + args.img_dir)
    if not os.path.isdir(args.ckpt_dir):
        os.system('mkdir ' + args.ckpt_dir)

    #########################################
    #### Train
    ## 1. Update Discriminator: maximize log(D(x)) + log(1-D(G(z)))
    # 1-1. with real image x
    # 1-2. with fake image G(z)
    ## 2. Update Generator: maximize log(D(G(z)))
    for e in range(args.epochs):
        epoch = start_epoch + e
        loss_meter_gen = AverageMeter()
        loss_meter_disc = AverageMeter()
        out_meter_disc_f = AverageMeter()
        out_meter_disc_r = AverageMeter()
        out_meter_disc_g = AverageMeter()
        for i, data in enumerate(train_loader):
            img_real, _ = data
            img_real = img_real.cuda()
            batch_s = img_real.size(0)

            optim_disc.zero_grad()

            # 1-1. with real image x
            label_r = torch.full((batch_s, 1), label_real).cuda()
            out_disc_r = disc(img_real).view(batch_s, -1)
            error_disc_r = criterion(out_disc_r, label_r)
            error_disc_r.backward()

            # 1-2. with fake image G(z)
            img_fake = gen(torch.randn(batch_s, args.z_dim, 1, 1).cuda())
            label_f = torch.full((batch_s, 1), label_fake).cuda()
            out_disc_f = disc(img_fake.detach()).view(batch_s, -1)
            error_disc_f = criterion(out_disc_f, label_f)

            error_disc = error_disc_r + error_disc_f
            error_disc_f.backward()
            optim_disc.step()

            # 2. Update Generator
            for g_iter in range(3):
                img_fake = gen(torch.randn(batch_s, args.z_dim, 1, 1).cuda())
                out_disc_g = disc(img_fake).view(batch_s, -1)
                error_gen = criterion(out_disc_g, label_r)
                optim_gen.zero_grad()
                error_gen.backward()
                optim_gen.step()

            loss_meter_gen.update(error_gen.item(), batch_s)
            loss_meter_disc.update(error_disc.item(), batch_s)
            out_meter_disc_f.update(torch.sum(out_disc_f).item(), batch_s)
            out_meter_disc_r.update(torch.sum(out_disc_r).item(), batch_s)
            out_meter_disc_g.update(torch.sum(out_disc_g).item(), batch_s)

            if i % args.log_term == 0:
                print(
                    'epoch: %d, batch: %d \t Loss(D/G): %.4f / %.4f \t D(R/F/G): %.4f / %.4f / %.4f'
                    % (epoch, i, loss_meter_disc.avg, loss_meter_gen.avg,
                       out_meter_disc_r.avg / batch_s, out_meter_disc_f.avg /
                       batch_s, out_meter_disc_g.avg / batch_s))
                fd = open('save_log.txt', 'a')
                fd.write(
                    'epoch: %d, batch: %d \t Loss(D/G): /%.4f / %.4f/ || D(R/F/G): /%.4f / %.4f / %.4f/ \n'
                    % (epoch, i, loss_meter_disc.avg, loss_meter_gen.avg,
                       out_meter_disc_r.avg, out_meter_disc_f.avg,
                       out_meter_disc_g.avg))
                fd.close()
                misc.plot_samples_from_images(
                    img_fake, batch_s, args.img_dir,
                    'img_e{}b{}.jpg'.format(epoch, i))

                torch.save(
                    {
                        'epoch': epoch,
                        'state_dict_gen': gen.state_dict(),
                        'state_dict_disc': disc.state_dict(),
                        'optimizer_gen': optim_gen.state_dict(),
                        'optimizer_disc': optim_disc.state_dict()
                    }, args.ckpt_dir + 'checkpoint_e{}b{}.pt'.format(epoch, i))

                loss_meter_gen = AverageMeter()
                loss_meter_disc = AverageMeter()
                out_meter_disc_f = AverageMeter()
                out_meter_disc_r = AverageMeter()
                out_meter_disc_g = AverageMeter()