def __init__(self, name, nPass, nPassD, nGenerators, batchSize, metric="default"): super().__init__(name, nPass, nPassD, nGenerators, batchSize, metric) self.generator_builder = models_uncond.build_generator_64 #128 self.discriminator_builder = models_uncond.build_discriminator_64 #128 self.imagesize = 64 #128 self.dataset = bedrooms(self.imagesize) self.X_train = shuffle(self.dataset) self.X_test = shuffle(self.dataset)[0:1024]
def load_data_file(df_name, va_count=2000): # load all data into memory print('loading data from {}...'.format(df_name)) xtr, x_std = load_and_scale_data(df_name) # split data into validation and training bits based # on placement within the data file. xva = xtr[:va_count, :] xtr = xtr[va_count:, :] # shuffle training/validation data and compute mean xtr = shuffle(xtr) xva = shuffle(xva) xmu = np.mean(xtr, axis=0) print('done.') return xtr, xva, xmu
def mnist_with_valid_set(): trX, teX, trY, teY = mnist() trX, trY = shuffle(trX, trY) vaX = trX[50000:] vaY = trY[50000:] trX = trX[:50000] trY = trY[:50000] return trX, vaX, teX, trY, vaY, teY
def svhn_with_valid_set(extra=False): if extra: trX, exX, teX, trY, exY, teY = svhn(extra=extra) else: trX, teX, trY, teY = svhn(extra=extra) trX, trY = shuffle(trX, trY) vaX = trX[:10000] vaY = trY[:10000] trX = trX[10000:] trY = trY[10000:] if extra: trS = np.asarray([1 for _ in range(len(trY))] + [0 for _ in range(len(exY))]) trX = np.concatenate([trX, exX], axis=0) trY = np.concatenate([trY, exY], axis=0) trX, trY, trS = shuffle(trX, trY, trS) if extra: return trX, vaX, teX, trY, vaY, teY, trS else: return trX, vaX, teX, trY, vaY, teY
def iter_data_discriminator(self, xreals, instances): #prepare xfake_list = instances[0].img[0:self.miniBatchForD, :, :, :] for i in range(1, len(instances)): xfake = instances[i].img[0:self.miniBatchForD, :, :, :] xfake_list = np.append(xfake_list, xfake, axis=0) #iteration for xreal, xfake in iter_data(xreals, shuffle(xfake_list), size=self.batchSize): yield xreal, xfake
def cal_margin(): ll = 0 teX = shuffle(X_test) m = 1000 batch_size = ntest // m for i in range(m): batch = [t % ntest for t in range(i*batch_size, (i+1)*batch_size)] imb = floatX(teX[batch]) ll += _marginal(imb) * len(batch) return ll / ntest
def __init__(self, name, nPass, nPassD, nGenerators, batchSize, metric="default"): super().__init__(name, nPass, nPassD, nGenerators, batchSize, metric) self.generator_builder = models_uncond.build_generator_32 self.discriminator_builder = models_uncond.build_discriminator_32 self.imagesize = 32 self.dataset = cifar10() self.X_train = shuffle(self.dataset["X_train"]) self.X_test = self.dataset["X_test"]
frontal_face = _face_rotator(eigen_code.reshape(1, -1, 1, 1), frontal_code.reshape(1, -1, 1, 1)) frontal_face = inverse_transform(frontal_face).reshape(npx, npx) imsave(frontal_path + image_names[image_idx], frontal_face) if phase == 'TEST': #generate_rotated_multipie_setting1() test(10000) if phase == 'TRAIN': log = open('logs/log.txt', 'w') log.close() for epoch in range(1, niter + niter_decay + 1): print 'epoch', epoch trY_A, trX_B, trY_B, trX_A = one_epoch_traning_data() trY_A, trX_B, trY_B, trX_A = shuffle(trY_A, trX_B, trY_B, trX_A) mean_vars_array = [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []] for ymb_A, xmb_B, ymb_B, xmb_A in tqdm(iter_data(trY_A, trX_B, trY_B, trX_A, size=nbatch), total=len(trY_A) / nbatch): ymb_A = transform(ymb_A) xmb_B = transform(xmb_B) ymb_B = transform(ymb_B) xmb_A = transform(xmb_A) if n_updates % (k + 1) == 0: output_g = _train_g(ymb_A, xmb_B, ymb_B, xmb_A) else:
def run(hp, folder): trX, trY, nb_classes = load_data() k = 1 # # of discrim updates for each gen update l2 = 2.5e-5 # l2 weight decay b1 = 0.5 # momentum term of adam nc = 1 # # of channels in image ny = nb_classes # # of classes nbatch = 128 # # of examples in batch npx = 28 # # of pixels width/height of images nz = 100 # # of dim for Z ngfc = 512 # # of gen units for fully connected layers ndfc = 512 # # of discrim units for fully connected layers ngf = 64 # # of gen filters in first conv layer ndf = 64 # # of discrim filters in first conv layer nx = npx*npx*nc # # of dimensions in X niter = 200 # # of iter at starting learning rate niter_decay = 100 # # of iter to linearly decay learning rate to zero lr = 0.0002 # initial learning rate for adam scale = 0.02 k = hp['k'] l2 = hp['l2'] #b1 = hp['b1'] nc = 1 ny = nb_classes nbatch = hp['nbatch'] npx = 28 nz = hp['nz'] ngfc = hp['ngfc'] # # of gen units for fully connected layers ndfc = hp['ndfc'] # # of discrim units for fully connected layers ngf = hp['ngf'] # # of gen filters in first conv layer ndf = hp['ndf'] # # of discrim filters in first conv layer nx = npx*npx*nc # # of dimensions in X niter = hp['niter'] # # of iter at starting learning rate niter_decay = hp['niter_decay'] # # of iter to linearly decay learning rate to zero lr = hp['lr'] # initial learning rate for adam scale = hp['scale'] #k = 1 # # of discrim updates for each gen update #l2 = 2.5e-5 # l2 weight decay b1 = 0.5 # momentum term of adam #nc = 1 # # of channels in image #ny = nb_classes # # of classes budget_hours = hp.get('budget_hours', 2) budget_secs = budget_hours * 3600 ntrain = len(trX) def transform(X): return (floatX(X)).reshape(-1, nc, npx, npx) def inverse_transform(X): X = X.reshape(-1, npx, npx) return X model_dir = folder samples_dir = os.path.join(model_dir, 'samples') if not os.path.exists(model_dir): os.makedirs(model_dir) if not os.path.exists(samples_dir): os.makedirs(samples_dir) relu = activations.Rectify() sigmoid = activations.Sigmoid() lrelu = activations.LeakyRectify() bce = T.nnet.binary_crossentropy gifn = inits.Normal(scale=scale) difn = inits.Normal(scale=scale) gw = gifn((nz, ngfc), 'gw') gw2 = gifn((ngfc, ngf*2*7*7), 'gw2') gw3 = gifn((ngf*2, ngf, 5, 5), 'gw3') gwx = gifn((ngf, nc, 5, 5), 'gwx') dw = difn((ndf, nc, 5, 5), 'dw') dw2 = difn((ndf*2, ndf, 5, 5), 'dw2') dw3 = difn((ndf*2*7*7, ndfc), 'dw3') dwy = difn((ndfc, 1), 'dwy') gen_params = [gw, gw2, gw3, gwx] discrim_params = [dw, dw2, dw3, dwy] def gen(Z, w, w2, w3, wx, use_batchnorm=True): if use_batchnorm: batchnorm_ = batchnorm else: batchnorm_ = lambda x:x h = relu(batchnorm_(T.dot(Z, w))) h2 = relu(batchnorm_(T.dot(h, w2))) h2 = h2.reshape((h2.shape[0], ngf*2, 7, 7)) h3 = relu(batchnorm_(deconv(h2, w3, subsample=(2, 2), border_mode=(2, 2)))) x = sigmoid(deconv(h3, wx, subsample=(2, 2), border_mode=(2, 2))) return x def discrim(X, w, w2, w3, wy): h = lrelu(dnn_conv(X, w, subsample=(2, 2), border_mode=(2, 2))) h2 = lrelu(batchnorm(dnn_conv(h, w2, subsample=(2, 2), border_mode=(2, 2)))) h2 = T.flatten(h2, 2) h3 = lrelu(batchnorm(T.dot(h2, w3))) y = sigmoid(T.dot(h3, wy)) return y X = T.tensor4() Z = T.matrix() gX = gen(Z, *gen_params) p_real = discrim(X, *discrim_params) p_gen = discrim(gX, *discrim_params) d_cost_real = bce(p_real, T.ones(p_real.shape)).mean() d_cost_gen = bce(p_gen, T.zeros(p_gen.shape)).mean() g_cost_d = bce(p_gen, T.ones(p_gen.shape)).mean() d_cost = d_cost_real + d_cost_gen g_cost = g_cost_d cost = [g_cost, d_cost, g_cost_d, d_cost_real, d_cost_gen] lrt = sharedX(lr) d_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2)) g_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2)) d_updates = d_updater(discrim_params, d_cost) g_updates = g_updater(gen_params, g_cost) #updates = d_updates + g_updates print 'COMPILING' t = time() _train_g = theano.function([X, Z], cost, updates=g_updates) _train_d = theano.function([X, Z], cost, updates=d_updates) _gen = theano.function([Z], gX) print '%.2f seconds to compile theano functions'%(time()-t) tr_idxs = np.arange(len(trX)) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(200, nz))) def gen_samples(n, nbatch=128): samples = [] labels = [] n_gen = 0 for i in range(n/nbatch): zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, nz))) xmb = _gen(zmb) samples.append(xmb) n_gen += len(xmb) n_left = n-n_gen zmb = floatX(np_rng.uniform(-1., 1., size=(n_left, nz))) xmb = _gen(zmb) samples.append(xmb) return np.concatenate(samples, axis=0) s = floatX(np_rng.uniform(-1., 1., size=(10000, nz))) n_updates = 0 n_check = 0 n_epochs = 0 n_updates = 0 n_examples = 0 t = time() begin = datetime.now() for epoch in range(1, niter+niter_decay+1): t = time() print("Epoch {}".format(epoch)) trX = shuffle(trX) for imb in tqdm(iter_data(trX, size=nbatch), total=ntrain/nbatch): imb = transform(imb) zmb = floatX(np_rng.uniform(-1., 1., size=(len(imb), nz))) if n_updates % (k+1) == 0: cost = _train_g(imb, zmb) else: cost = _train_d(imb, zmb) n_updates += 1 n_examples += len(imb) samples = np.asarray(_gen(sample_zmb)) grayscale_grid_vis(inverse_transform(samples), (10, 20), '{}/{:05d}.png'.format(samples_dir, n_epochs)) n_epochs += 1 if n_epochs > niter: lrt.set_value(floatX(lrt.get_value() - lr/niter_decay)) if n_epochs % 50 == 0 or epoch == niter + niter_decay or epoch == 1: imgs = [] for i in range(0, s.shape[0], nbatch): imgs.append(_gen(s[i:i+nbatch])) img = np.concatenate(imgs, axis=0) samples_filename = '{}/{:05d}_gen.npz'.format(model_dir, n_epochs) joblib.dump(img, samples_filename, compress=9) shutil.copy(samples_filename, '{}/gen.npz'.format(model_dir)) joblib.dump([p.get_value() for p in gen_params], '{}/d_gen_params.jl'.format(model_dir, n_epochs), compress=9) joblib.dump([p.get_value() for p in discrim_params], '{}/discrim_params.jl'.format(model_dir, n_epochs), compress=9) print('Elapsed : {}sec'.format(time() - t)) if (datetime.now() - begin).total_seconds() >= budget_secs: print("Budget finished.quit.") break
'n_seconds', 'g_cost', 'd_cost', ] print desc.upper() n_updates = 0 n_check = 0 n_epochs = 0 n_updates = 0 n_examples = 0 t = time() sample_z0mb = rand_gen(size=(200, nz0)) # noise samples for top generator module for epoch in range(1, niter + niter_decay + 1): trX = shuffle(trX) for imb in tqdm(iter_data(trX, size=nbatch), total=ntrain / nbatch): imb = transform(imb) z0mb = rand_gen(size=(len(imb), nz0)) if n_updates % (k + 1) == 0: cost = _train_g(imb, z0mb) else: cost = _train_d(imb, z0mb) n_updates += 1 n_examples += len(imb) samples = np.asarray(_gen(sample_z0mb)) grayscale_grid_vis(inverse_transform(samples), (10, 20), "{}/{}.png".format(sample_dir, n_epochs)) n_epochs += 1 if n_epochs > niter: lrt.set_value(floatX(lrt.get_value() - lr / niter_decay))
def main(): # Parameters task = 'toy' name = '8G_MOEGAN_MMDu2' #'8G_MOEGAN_PFq_NFd_t2' DIM = 512 begin_save = 0 loss_type = ['trickLogD', 'minimax', 'ls'] #['trickLogD', 'minimax', 'ls'] nloss = 3 #2 DATASET = '8gaussians' batchSize = 64 ncandi = 8 kD = 1 # # of discrim updates for each gen update kG = 1 # # of discrim updates for each gen update ntf = 256 b1 = 0.5 # momentum term of adam nz = 2 # # of dim for Z niter = 4 # # of iter at starting learning rate lr = 0.0001 # initial learning rate for adam G lrd = 0.0001 # initial learning rate for adam D N_up = 100000 save_freq = 10000 / 10 show_freq = 10000 / 10 test_deterministic = True beta = 1. GP_norm = False # if use gradients penalty on discriminator LAMBDA = 2. # hyperparameter of GP NSGA2 = True # Load the dataset # MODEL D print("Building model and compiling functions...") # Prepare Theano variables for inputs and targets real_imgs = T.matrix('real_imgs') fake_imgs = T.matrix('fake_imgs') # Create neural network model discriminator = models_uncond.build_discriminator_toy(nd=DIM, GP_norm=GP_norm) # Create expression for passing real data through the discriminator real_out = lasagne.layers.get_output(discriminator, real_imgs) # Create expression for passing fake data through the discriminator fake_out = lasagne.layers.get_output(discriminator, fake_imgs) # Create loss expressions discriminator_loss = ( lasagne.objectives.binary_crossentropy(real_out, 1) + lasagne.objectives.binary_crossentropy(fake_out, 0)).mean() # Gradients penalty norm if GP_norm is True: alpha = t_rng.uniform((batchSize, 1), low=0., high=1.) differences = fake_imgs - real_imgs interpolates = real_imgs + (alpha * differences) gradients = theano.grad(lasagne.layers.get_output( discriminator, interpolates).sum(), wrt=interpolates) slopes = T.sqrt(T.sum(T.sqr(gradients), axis=(1))) gradient_penalty = T.mean((slopes - 1.)**2) D_loss = discriminator_loss + LAMBDA * gradient_penalty b1_d = 0. else: D_loss = discriminator_loss b1_d = 0. # Create update expressions for training discriminator_params = lasagne.layers.get_all_params(discriminator, trainable=True) lrtd = theano.shared(lasagne.utils.floatX(lrd)) updates_d = lasagne.updates.adam(D_loss, discriminator_params, learning_rate=lrtd, beta1=b1_d) lrt = theano.shared(lasagne.utils.floatX(lr)) # Fd Socre Fd = theano.gradient.grad(discriminator_loss, discriminator_params) Fd_score = beta * T.log(sum(T.sum(T.sqr(x)) for x in Fd)) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_d = theano.function([real_imgs, fake_imgs], discriminator_loss, updates=updates_d) # Compile another function generating some data dis_fn = theano.function([real_imgs, fake_imgs], [(fake_out).mean(), Fd_score]) disft_fn = theano.function([real_imgs, fake_imgs], [ real_out.mean(), fake_out.mean(), (real_out > 0.5).mean(), (fake_out > 0.5).mean(), Fd_score ]) # Finally, launch the training loop. print("Starting training...") desc = task + '_' + name print(desc) if not os.path.isdir('logs'): os.mkdir(os.path.join('logs')) f_log = open('logs/%s.ndjson' % desc, 'wb') if not os.path.isdir('models'): os.mkdir(os.path.join('models/')) if not os.path.isdir('models/' + desc): os.mkdir(os.path.join('models/', desc)) gen_new_params = [] # We iterate over epochs: for n_updates in range(N_up): xmb = toy_dataset(DATASET=DATASET, size=batchSize * kD) xmb = xmb[0:batchSize * kD] # initial G cluster if n_updates == 0: for can_i in range(0, ncandi): train_g, gen_fn, generator = create_G( loss_type=loss_type[can_i % nloss], discriminator=discriminator, lr=lr, b1=b1, DIM=DIM) for _ in range(0, kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g(zmb) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = gen_fn(sample_zmb) gen_new_params.append( lasagne.layers.get_all_param_values(generator)) if can_i == 0: g_imgs_old = gen_imgs fmb = gen_imgs[0:int(batchSize / ncandi * kD), :] else: g_imgs_old = np.append(g_imgs_old, gen_imgs, axis=0) newfmb = gen_imgs[0:int(batchSize / ncandi * kD), :] fmb = np.append(fmb, newfmb, axis=0) # print gen_new_params # MODEL G noise = T.matrix('noise') generator = models_uncond.build_generator_toy(noise, nd=DIM) Tgimgs = lasagne.layers.get_output(generator) Tfake_out = lasagne.layers.get_output(discriminator, Tgimgs) g_loss_logD = lasagne.objectives.binary_crossentropy(Tfake_out, 1).mean() g_loss_minimax = - \ lasagne.objectives.binary_crossentropy(Tfake_out, 0).mean() g_loss_ls = T.mean(T.sqr((Tfake_out - 1))) g_params = lasagne.layers.get_all_params(generator, trainable=True) up_g_logD = lasagne.updates.adam(g_loss_logD, g_params, learning_rate=lrt, beta1=b1) up_g_minimax = lasagne.updates.adam(g_loss_minimax, g_params, learning_rate=lrt, beta1=b1) up_g_ls = lasagne.updates.adam(g_loss_ls, g_params, learning_rate=lrt, beta1=b1) train_g = theano.function([noise], g_loss_logD, updates=up_g_logD) train_g_minimax = theano.function([noise], g_loss_minimax, updates=up_g_minimax) train_g_ls = theano.function([noise], g_loss_ls, updates=up_g_ls) gen_fn = theano.function([noise], lasagne.layers.get_output( generator, deterministic=True)) else: class Instance: def __init__(self, fq, fd, params, img_values, image_copy): self.fq = fq self.fd = fd self.params = params self.vimg = img_values self.cimg = image_copy def f(self): return self.fq - self.fd instances = [] fq_list = np.zeros(ncandi) fd_list = np.zeros(ncandi) gen_old_params = gen_new_params for can_i in range(0, ncandi): for type_i in range(0, nloss): lasagne.layers.set_all_param_values( generator, gen_old_params[can_i]) if loss_type[type_i] == 'trickLogD': for _ in range(0, kG): zmb = floatX( np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g(zmb) elif loss_type[type_i] == 'minimax': for _ in range(0, kG): zmb = floatX( np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g_minimax(zmb) elif loss_type[type_i] == 'ls': for _ in range(0, kG): zmb = floatX( np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g_ls(zmb) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = gen_fn(sample_zmb) frr_score, fd_score = dis_fn(xmb[0:ntf], gen_imgs) instances.append( Instance( frr_score, fd_score, lasagne.layers.get_all_param_values(generator), gen_imgs, gen_imgs[0:int(batchSize / ncandi * kD), :])) if ncandi < len(instances): if NSGA2 == True: cromos = { idx: [float(inst.fq), -float(inst.fd)] for idx, inst in enumerate(instances) } cromos_idxs = [idx for idx, _ in enumerate(instances)] finalpop = nsga_2_pass(ncandi, cromos, cromos_idxs) for idx, p in enumerate(finalpop): inst = instances[p] gen_new_params[idx] = inst.params fq_list[idx] = inst.fq fd_list[idx] = inst.fd fake_rate[idx] = inst.f() g_imgs_old[idx * ntf:(idx + 1) * ntf, :] = inst.vimg fmb[int(idx * batchSize / ncandi * kD):math.ceil((idx + 1) * batchSize / ncandi * kD), :] = inst.cimg with open('front/%s.tsv' % desc, 'wb') as ffront: for idx, p in enumerate(finalpop): inst = instances[p] ffront.write( (str(inst.fq) + "\t" + str(inst.fd)).encode()) ffront.write("\n".encode()) else: for idx, inst in enumerate(instances): if idx < ncandi: gen_new_params[idx] = inst.params fake_rate[idx] = inst.f() fq_list[idx] = inst.fq fd_list[idx] = inst.fd g_imgs_old[idx * ntf:(idx + 1) * ntf, :] = inst.vimg fmb[int(idx * batchSize / ncandi * kD):math.ceil((idx + 1) * batchSize / ncandi * kD), :] = inst.cimg else: fr_com = fake_rate - inst.f() if min(fr_com) < 0: idr = np.where(fr_com == min(fr_com))[0][0] gen_new_params[idr] = inst.params fake_rate[idr] = inst.f() g_imgs_old[idr * ntf:(idr + 1) * ntf, :] = inst.vimg fmb[int(idr * batchSize / ncandi * kD):math.ceil((idr + 1) * batchSize / ncandi * kD), :] = inst.cimg sample_xmb = toy_dataset(DATASET=DATASET, size=ncandi * ntf) sample_xmb = sample_xmb[0:ncandi * ntf] for i in range(0, ncandi): xfake = g_imgs_old[i * ntf:(i + 1) * ntf, :] xreal = sample_xmb[i * ntf:(i + 1) * ntf, :] tr, fr, trp, frp, fdscore = disft_fn(xreal, xfake) if i == 0: fake_rate = np.array([fr]) real_rate = np.array([tr]) fake_rate_p = np.array([frp]) real_rate_p = np.array([trp]) FDL = np.array([fdscore]) else: fake_rate = np.append(fake_rate, fr) real_rate = np.append(real_rate, tr) fake_rate_p = np.append(fake_rate_p, frp) real_rate_p = np.append(real_rate_p, trp) FDL = np.append(FDL, fdscore) print(fake_rate, fake_rate_p, FDL) print(n_updates, real_rate.mean(), real_rate_p.mean()) f_log.write((str(fake_rate) + ' ' + str(fake_rate_p) + '\n' + str(n_updates) + ' ' + str(real_rate.mean()) + ' ' + str(real_rate_p.mean()) + '\n').encode()) f_log.flush() # train D for xreal, xfake in iter_data(xmb, shuffle(fmb), size=batchSize): cost = train_d(xreal, xfake) if n_updates % show_freq == 0: s_zmb = floatX(np_rng.uniform(-1., 1., size=(512, nz))) params_max = gen_new_params[np.argmax(fake_rate)] lasagne.layers.set_all_param_values(generator, params_max) g_imgs_max = gen_fn(s_zmb) if n_updates % show_freq == 0 and n_updates != 0: #metric s_zmb = floatX(np_rng.uniform(-1., 1., size=(512, nz))) xmb = toy_dataset(DATASET=DATASET, size=512) mmd2_all = [] for i in range(0, ncandi): lasagne.layers.set_all_param_values(generator, gen_new_params[i]) g_imgs_min = gen_fn(s_zmb) mmd2_all.append(compute_metric_mmd2(g_imgs_min, xmb)) mmd2_all = np.array(mmd2_all) if NSGA2: with open('front/%s_mmd2u.tsv' % desc, 'wb') as ffront: for idx in range(0, ncandi): ffront.write( (str(fq_list[idx]) + "\t" + str(fd_list[idx]) + "\t" + str(mmd2_all[idx])).encode()) ffront.write("\n".encode()) #save best params = gen_new_params[np.argmin(mmd2_all)] lasagne.layers.set_all_param_values(generator, params) g_imgs_min = gen_fn(s_zmb) generate_image(xmb, g_imgs_min, n_updates / save_freq, desc, postfix="_mmu2d") np.savez('models/%s/gen_%d.npz' % (desc, n_updates / save_freq), *lasagne.layers.get_all_param_values(discriminator)) np.savez('models/%s/dis_%d.npz' % (desc, n_updates / save_freq), *lasagne.layers.get_all_param_values(generator))
total_svgd_ll = [] total_langevin_acc = [] total_langevin_ll = [] total_m_acc = [] total_m_ll = [] print "Start testing on separete data set" for data_i in range(0, 8): # For each dataset training on dev and testing on test dataset X_dev, y_dev = total_dev[data_i] X_test, y_test = total_test[data_i] dev_N = X_dev.shape[0] X_dev, y_dev = shuffle(X_dev, y_dev) X_dev, y_dev = shuffle(X_dev, y_dev) X_test, y_test = shuffle(X_test, y_test) dev_N = X_dev.shape[0] print "data size %d" %(dev_N) ### svgd x0 = init_theta(n_particle=n_particle) x0 = sharedX(x0) _svgd_step = _make_svgd_step(x0, lr=1e-6 * (2 ** 13)) for i in tqdm(range(n_iter)): imb = [t % dev_N for t in range(i*nbatch, (i+1)*nbatch)] _svgd_step(X_dev[imb], y_dev[imb], dev_N)
def main(): # Parameters task = 'toy' name = '25G' DIM=512 begin_save = 0 loss_type = ['trickLogD','minimax','ls'] nloss = 3 DATASET = '25gaussians' batchSize = 64 ncandi = 1 kD = 1 # # of discrim updates for each gen update kG = 1 # # of discrim updates for each gen update ntf = 256 b1 = 0.5 # momentum term of adam nz = 2 # # of dim for Z niter = 4 # # of iter at starting learning rate lr = 0.0001 # initial learning rate for adam G lrd = 0.0001 # initial learning rate for adam D N_up = 100000 save_freq = 10000 show_freq = 10000 test_deterministic = True beta = 1. GP_norm = False # if use gradients penalty on discriminator LAMBDA = 2. # hyperparameter of GP # Load the dataset # MODEL D print("Building model and compiling functions...") # Prepare Theano variables for inputs and targets real_imgs = T.matrix('real_imgs') fake_imgs = T.matrix('fake_imgs') # Create neural network model discriminator = models_uncond.build_discriminator_toy(nd=DIM, GP_norm=GP_norm) # Create expression for passing real data through the discriminator real_out = lasagne.layers.get_output(discriminator, real_imgs) # Create expression for passing fake data through the discriminator fake_out = lasagne.layers.get_output(discriminator, fake_imgs) # Create loss expressions discriminator_loss = (lasagne.objectives.binary_crossentropy(real_out, 1) + lasagne.objectives.binary_crossentropy(fake_out, 0)).mean() # Gradients penalty norm if GP_norm is True: alpha = t_rng.uniform((batchSize,1), low=0.,high=1.) differences = fake_imgs - real_imgs interpolates = real_imgs + (alpha*differences) gradients = theano.grad(lasagne.layers.get_output(discriminator, interpolates).sum(), wrt=interpolates) slopes = T.sqrt(T.sum(T.sqr(gradients), axis=(1))) gradient_penalty = T.mean((slopes-1.)**2) D_loss = discriminator_loss + LAMBDA*gradient_penalty b1_d = 0. else: D_loss = discriminator_loss b1_d = 0. # Create update expressions for training discriminator_params = lasagne.layers.get_all_params(discriminator, trainable=True) lrtd = theano.shared(lasagne.utils.floatX(lrd)) updates_d = lasagne.updates.adam( D_loss, discriminator_params, learning_rate=lrtd, beta1=b1_d) lrt = theano.shared(lasagne.utils.floatX(lr)) # Fd Socre Fd = theano.gradient.grad(discriminator_loss, discriminator_params) Fd_score = beta*T.log(sum(T.sum(T.sqr(x)) for x in Fd)) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_d = theano.function([real_imgs, fake_imgs], discriminator_loss, updates=updates_d) # Compile another function generating some data dis_fn = theano.function([real_imgs,fake_imgs],[(fake_out).mean(),Fd_score]) disft_fn = theano.function([real_imgs,fake_imgs], [real_out.mean(), fake_out.mean(), (real_out>0.5).mean(), (fake_out>0.5).mean(), Fd_score]) # Finally, launch the training loop. print("Starting training...") desc = task + '_' + name print desc if not os.path.isdir('logs'): os.mkdir(os.path.join('logs')) f_log = open('logs/%s.ndjson'%desc, 'wb') if not os.path.isdir('models'): os.mkdir(os.path.join('models/')) if not os.path.isdir('models/'+desc): os.mkdir(os.path.join('models/',desc)) gen_new_params = [] # We iterate over epochs: for n_updates in range(N_up): xmb = toy_dataset(DATASET=DATASET, size=batchSize*kD) xmb = xmb[0:batchSize*kD] # initial G cluster if n_updates == 0: for can_i in range(0,ncandi): train_g, gen_fn, generator = create_G( loss_type=loss_type[can_i%nloss], discriminator=discriminator, lr=lr, b1=b1, DIM=DIM) for _ in range(0,kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g(zmb) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = gen_fn(sample_zmb) gen_new_params.append(lasagne.layers.get_all_param_values(generator)) if can_i == 0: g_imgs_old=gen_imgs fmb = gen_imgs[0:batchSize/ncandi*kD,:] else: g_imgs_old = np.append(g_imgs_old,gen_imgs,axis=0) fmb = np.append(fmb,gen_imgs[0:batchSize/ncandi*kD,:],axis=0) #print gen_new_params # MODEL G noise = T.matrix('noise') generator = models_uncond.build_generator_toy(noise,nd=DIM) Tgimgs = lasagne.layers.get_output(generator) Tfake_out = lasagne.layers.get_output(discriminator, Tgimgs) g_loss_logD = lasagne.objectives.binary_crossentropy(Tfake_out, 1).mean() g_loss_minimax = -lasagne.objectives.binary_crossentropy(Tfake_out, 0).mean() g_loss_ls = T.mean(T.sqr((Tfake_out - 1))) g_params = lasagne.layers.get_all_params(generator, trainable=True) up_g_logD = lasagne.updates.adam(g_loss_logD, g_params, learning_rate=lrt, beta1=b1) up_g_minimax = lasagne.updates.adam(g_loss_minimax, g_params, learning_rate=lrt, beta1=b1) up_g_ls = lasagne.updates.adam(g_loss_ls, g_params, learning_rate=lrt, beta1=b1) train_g = theano.function([noise],g_loss_logD,updates=up_g_logD) train_g_minimax = theano.function([noise],g_loss_minimax,updates=up_g_minimax) train_g_ls = theano.function([noise],g_loss_ls,updates=up_g_ls) gen_fn = theano.function([noise], lasagne.layers.get_output( generator,deterministic=True)) else: gen_old_params = gen_new_params for can_i in range(0,ncandi): for type_i in range(0,nloss): lasagne.layers.set_all_param_values(generator, gen_old_params[can_i]) if loss_type[type_i] == 'trickLogD': for _ in range(0,kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g(zmb) elif loss_type[type_i] == 'minimax': for _ in range(0,kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g_minimax(zmb) elif loss_type[type_i] == 'ls': for _ in range(0,kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g_ls(zmb) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = gen_fn(sample_zmb) frr_score, fd_score = dis_fn(xmb[0:ntf],gen_imgs) #frr = frr[0] frr = frr_score - fd_score if can_i*nloss + type_i < ncandi: idx = can_i*nloss + type_i gen_new_params[idx]=lasagne.layers.get_all_param_values(generator) fake_rate[idx]=frr g_imgs_old[idx*ntf:(idx+1)*ntf,:]=gen_imgs fmb[idx*batchSize/ncandi*kD:(idx+1)*batchSize/ncandi*kD,:] = \ gen_imgs[0:batchSize/ncandi*kD,:] else: fr_com = fake_rate - frr if min(fr_com) < 0: ids_replace = np.where(fr_com==min(fr_com)) idr = ids_replace[0][0] fake_rate[idr]=frr gen_new_params[idr] = lasagne.layers.get_all_param_values(generator) g_imgs_old[idr*ntf:(idr+1)*ntf,:]=gen_imgs fmb[idr*batchSize/ncandi*kD:(idr+1)*batchSize/ncandi*kD,:] = \ gen_imgs[0:batchSize/ncandi*kD,:] sample_xmb = toy_dataset(DATASET=DATASET, size=ncandi*ntf) sample_xmb = sample_xmb[0:ncandi*ntf] for i in range(0, ncandi): xfake = g_imgs_old[i*ntf:(i+1)*ntf,:] xreal = sample_xmb[i*ntf:(i+1)*ntf,:] tr, fr, trp, frp, fdscore = disft_fn(xreal,xfake) if i == 0: fake_rate = np.array([fr]) real_rate = np.array([tr]) fake_rate_p = np.array([frp]) real_rate_p = np.array([trp]) FDL = np.array([fdscore]) else: fake_rate = np.append(fake_rate,fr) real_rate = np.append(real_rate,tr) fake_rate_p = np.append(fake_rate_p,frp) real_rate_p = np.append(real_rate_p,trp) FDL = np.append(FDL,fdscore) print fake_rate, fake_rate_p, FDL print (n_updates, real_rate.mean(), real_rate_p.mean()) f_log.write(str(fake_rate)+' '+str(fake_rate_p)+'\n'+ str(n_updates) + ' ' + str(real_rate.mean())+ ' ' +str(real_rate_p.mean())+'\n') f_log.flush() # train D for xreal,xfake in iter_data(xmb, shuffle(fmb), size=batchSize): cost = train_d(xreal, xfake) if n_updates%show_freq == 0: s_zmb = floatX(np_rng.uniform(-1., 1., size=(512, nz))) g_imgs = gen_fn(s_zmb) xmb = toy_dataset(DATASET=DATASET, size=512) generate_image(xmb,g_imgs,n_updates/save_freq,desc)
'n_seconds', 'g_cost', 'd_cost', ] print desc.upper() n_updates = 0 n_check = 0 n_epochs = 0 n_updates = 0 n_examples = 0 t = time() sample_z0mb = rand_gen(size=(200, nz0)) # noise samples for top generator module for epoch in range(1, niter + niter_decay + 1): Xtr = shuffle(Xtr) g_cost = 0 d_cost = 0 gc_iter = 0 dc_iter = 0 for imb in tqdm(iter_data(Xtr, size=nbatch), total=ntrain / nbatch): imb = train_transform(imb) z0mb = rand_gen(size=(len(imb), nz0)) if n_updates % (k + 1) == 0: g_cost += _train_g(imb, z0mb)[0] gc_iter += 1 else: d_cost += _train_d(imb, z0mb)[1] dc_iter += 1 n_updates += 1 n_examples += len(imb)
def main( problem, popsize, algorithm, save_freq, loss_type=['trickLogD', 'minimax', 'ls'], postfix=None, nPassD=1, #backpropagation pass for discriminator batchSize=64, metric="default", output_dir="runs", gradients_penalty=False): if not (problem in problem_table.keys()): exit(-1) #task task_args = problem_table[problem][1] task = problem_table[problem][0](nPassD, popsize, batchSize, metric) net_otype = task.net_output_type() # description description_name = '{}_{}_{}_{}'.format( str(task), algorithm, popsize, postfix if postfix is not None else "", ) # share params nloss = len(loss_type) lr = task_args['lr'] # initial learning rate for adam G lrd = task_args['lrd'] # initial learning rate for adam D b1 = task_args['b1'] # momentum term of adam beta = task_args['beta'] # momentum term of adam samples = task_args['metric_samples'] # metric samples DIM = task_args['dim'] # momentum term of adam GP_norm = gradients_penalty # if use gradients penalty on discriminator LAMBDA = 2. # hyperparameter sudof GP # algorithm params if algorithm == "egan": VARIATION = "all" MULTI_OBJECTIVE_SELECTION = False elif algorithm == "moegan": VARIATION = "all" MULTI_OBJECTIVE_SELECTION = True elif algorithm == "smoegan": VARIATION = "deepqlearning" MULTI_OBJECTIVE_SELECTION = True else: exit(-2) # Load the dataset def create_generator_trainer(noise=None, discriminator=None, lr=0.0002, b1=0.5, DIM=64): return GeneratorTrainer(noise, task.create_geneator(noise, DIM), discriminator, lr, b1) # MODEL D print("Building model and compiling functions...") # Prepare Theano variables for inputs and targets real_imgs = net_otype('real_imgs') fake_imgs = net_otype('fake_imgs') # Create neural network model discriminator = task.create_discriminator(DIM, GP_norm) # Create expression for passing real data through the discriminator real_out = lasagne.layers.get_output(discriminator, real_imgs) # Create expression for passing fake data through the discriminator fake_out = lasagne.layers.get_output(discriminator, fake_imgs) # Create loss expressions discriminator_loss = ( lasagne.objectives.binary_crossentropy(real_out, 1) + lasagne.objectives.binary_crossentropy(fake_out, 0)).mean() # Gradients penalty norm if GP_norm is True: alpha = t_rng.uniform((batchSize, 1), low=0., high=1.) differences = fake_imgs - real_imgs interpolates = real_imgs + (alpha * differences) gradients = theano.grad(lasagne.layers.get_output( discriminator, interpolates).sum(), wrt=interpolates) slopes = T.sqrt(T.sum(T.sqr(gradients), axis=(1))) gradient_penalty = T.mean((slopes - 1.)**2) D_loss = discriminator_loss + LAMBDA * gradient_penalty b1_d = 0. else: D_loss = discriminator_loss b1_d = 0. # Create update expressions for training discriminator_params = lasagne.layers.get_all_params(discriminator, trainable=True) lrtd = theano.shared(lasagne.utils.floatX(lrd)) updates_d = lasagne.updates.adam(D_loss, discriminator_params, learning_rate=lrtd, beta1=b1_d) #lrt = theano.shared(lasagne.utils.floatX(lr)) # Fd Socre Fd = theano.gradient.grad(discriminator_loss, discriminator_params) Fd_score = beta * T.log(sum(T.sum(T.sqr(x)) for x in Fd)) # max is ~7.5 for toy dataset and ~0.025 for real ones (it will be updated after 1 iteration, which is likely the worst one) Fd_auto_normalization = AutoNormalization(float(0.1)) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_d = theano.function([real_imgs, fake_imgs], discriminator_loss, updates=updates_d) # Compile another function generating some data dis_fn = theano.function([real_imgs, fake_imgs], [fake_out.mean(), Fd_score]) disft_fn = theano.function([real_imgs, fake_imgs], [ real_out.mean(), fake_out.mean(), (real_out > 0.5).mean(), (fake_out > 0.5).mean(), Fd_score ]) #main MODEL G noise = T.matrix('noise') generator_trainer = create_generator_trainer(noise, discriminator, lr, b1, DIM) # Finally, launch the training loop. print("Starting training...") print(description_name) #build dirs path_front, path_logs, path_models, path_models_last, path_images = build_output_dirs( output_dir, description_name) #define a problem instance instances = [] instances_old = [] #generator of a offspring def generate_offsptring(xreal, loss_id, pop_id, inst=None): if inst == None: newparams = create_generator_trainer(noise=noise, discriminator=discriminator, lr=lr, b1=b1, DIM=DIM).get() inst = Instance(-float("inf"), float("inf"), newparams, -1, pop_id, None) #init gen generator_trainer.set(inst.params) #train generator_trainer.train(loss_type[loss_id], task.noise_batch()) #score xfake = generator_trainer.gen(task.noise_batch()) frr_score, fd_score = dis_fn(xreal, xfake) #new instance new_instance = Instance(frr_score, fd_score, generator_trainer.get(), loss_id, pop_id, xfake) #save instances.append(new_instance) #info stuff return new_instance #init varation variation = get_varation(VARIATION)(popsize, nloss, generate_offsptring) #reval pop with new D def reval_pupulation(in_instances): #ret out_instances = [] #generates new batches of images for each generator, and then eval these sets by means (new) D for inst in in_instances: generator_trainer.set(inst.params) xfake = generator_trainer.gen(task.noise_batch()) frr_score, fd_score = dis_fn(xreal_eval, xfake) out_instances.append( Instance(frr_score, fd_score, generator_trainer.get(), inst.loss_id, inst.pop_id, xfake, im_parent=True)) return out_instances #log stuff LOG_HEADER, LOG_TEMPLATE = build_log_template(popsize, nloss) log = Logger(os.path.join(path_logs, 'logs.tsv'), header=LOG_HEADER.encode()) timer = Timer() losses_counter = [0] * nloss # We iterate over epochs: for n_updates in task.get_range(): #get batch xmb = task.batch() #get eval batch if xmb.shape[0] == batchSize: xreal_eval = xmb else: xreal_eval = shuffle(xmb)[:batchSize] # initial G cluster if MULTI_OBJECTIVE_SELECTION: instances_old = reval_pupulation(instances) else: instances_old = instances #reset instances = [] variation.update(instances_old, task.is_last()) for pop_id in range(0, popsize): variation.gen(xreal_eval, instances_old[pop_id] if n_updates else None, pop_id) if popsize <= (len(instances) + len(instances_old)): if MULTI_OBJECTIVE_SELECTION == True: #add parents in the pool instances = [*instances_old, *instances] #from the orginal code, we have to maximize D(G(X)), #Since in NSGA2 performences a minimization, #We are going to minimize -D(G(X)), #also we want maximize the diversity score, #So, we are going to minimize -diversity score (also we wanna normalize that value) cromos = { idx: [-float(inst.fq), -float(Fd_auto_normalization(inst.fd))] for idx, inst in enumerate(instances) } # S2 cromos_idxs = [idx for idx, _ in enumerate(instances)] finalpop = nsga_2_pass(popsize, cromos, cromos_idxs) instances = [instances[p] for p in finalpop] with open(os.path.join(path_front, 'last.tsv'), 'wb') as ffront: for inst in instances: ffront.write( (str(inst.fq) + "\t" + str(inst.fd)).encode()) ffront.write("\n".encode()) elif nloss > 1: #sort new instances.sort(key=lambda inst: inst.f() ) #(from the orginal code in github) maximize #cut best ones instances = instances[len(instances) - popsize:] for i in range(0, popsize): xreal, xfake = task.statistic_datas(instances[i].img) tr, fr, trp, frp, fdscore = disft_fn(xreal, xfake) fake_rate = np.array([fr]) if i == 0 else np.append(fake_rate, fr) real_rate = np.array([tr]) if i == 0 else np.append(real_rate, tr) fake_rate_p = np.array([frp]) if i == 0 else np.append( fake_rate_p, frp) real_rate_p = np.array([trp]) if i == 0 else np.append( real_rate_p, trp) FDL = np.array([fdscore]) if i == 0 else np.append(FDL, fdscore) losses_counter[instances[i].loss_id] += 1 # train D for xreal, xfake in task.iter_data_discriminator(xmb, instances): train_d(xreal, xfake) #show it info print(n_updates, real_rate.mean(), real_rate_p.mean()) #write logs log.writeln( LOG_TEMPLATE.format(n_updates, str(timer), fake_rate.mean(), real_rate.mean(), *fake_rate, *real_rate, *FDL, *losses_counter).encode()) #varation logs variation.logs(path_logs, n_updates, last_iteration=task.is_last()) if (n_updates % save_freq == 0 and n_updates != 0) or n_updates == 1 or task.is_last(): #it same if task.is_last(): id_name_update = math.ceil(float(n_updates) / save_freq) else: id_name_update = math.floor(float(n_updates) / save_freq) #if is egan, eval only the best one. if MULTI_OBJECTIVE_SELECTION == True: instances_to_eval = instances else: instances_to_eval = [instances[-1]] #metric metric_results = task.compute_metrics( instances_to_eval, lambda inst, nz: generator_trainer.set(inst.params).gen(nz), samples) #mmd2 output print(n_updates, "metric:", np.min(metric_results), "id:", np.argmin(metric_results)) #best best = np.argmin(metric_results) worst = np.argmax(metric_results) np.savez( os.path.join(path_models, 'dis_%s.npz') % (id_name_update), *lasagne.layers.get_all_param_values(discriminator)) np.savez( os.path.join(path_models, 'gen_%s.npz') % (id_name_update), *instances_to_eval[best].params) #save best generator_trainer.set(instances_to_eval[best].params) xfake_best = generator_trainer.gen(task.noise_batch(samples)) #worst_debug generator_trainer.set(instances_to_eval[worst].params) xfake_worst = generator_trainer.gen(task.noise_batch(samples)) #save images task.save_image(xmb, xfake_best, path_images, "best_%s" % (id_name_update)) task.save_image(xmb, xfake_worst, path_images, "worst_%s" % (id_name_update)) #print pareto front with open( os.path.join(path_front, '%s.tsv') % (id_name_update), 'wb') as ffront: for idx in range(len(instances_to_eval)): ffront.write((str(instances_to_eval[idx].fq) + "\t" + str(instances_to_eval[idx].fd) + "\t" + str(metric_results[idx])).encode()) ffront.write("\n".encode()) #save all last models: if task.is_last(): for key, inst in enumerate(instances_to_eval): np.savez( os.path.join(path_models_last, 'gen_%s.npz') % (key), *inst.params)
'vae_kld_cost'] g_cost_outputs = g_basic_costs # compile function for computing generator costs and updates i_train_func = theano.function([Xg], g_cost_outputs, updates=inf_updates) print "{0:.2f} seconds to compile theano functions".format(time() - t) # make file for recording test progress log_name = "{}/FINE-TUNE.txt".format(result_dir) out_file = open(log_name, 'wb') print("EXPERIMENT: {}".format(desc.upper())) n_check = 0 n_updates = 0 t = time() for epoch in range(1, 200): Xva = shuffle(Xva) # initialize cost arrays g_epoch_costs = [0. for gco in g_cost_outputs] g_batch_count = 0. if (epoch < 25): lrt.set_value(floatX(0.00001)) elif (epoch < 50): lrt.set_value(floatX(0.00003)) for imb in tqdm(iter_data(Xva, size=100), total=(ntrain / 100)): # transform training batch to "image format" imb_img = train_transform(imb) # train vae on training batch g_result = i_train_func(floatX(imb_img)) g_epoch_costs = [(v1 + v2) for v1, v2 in zip(g_result, g_epoch_costs)] g_batch_count += 1 if (epoch == 75) or (epoch == 150):
def main( problem, popsize, moegan, freq, loss_type=['trickLogD', 'minimax', 'ls'], postfix=None, nPassD=1, #backpropagation pass for discriminator inBatchSize=64): # Parameters task = 'toy' name = '{}_{}_{}MMDu2'.format( problem, "MOEGAN" if moegan else "EGAN", postfix + "_" if postfix is not None else "") #'8G_MOEGAN_PFq_NFd_t2' DIM = 512 begin_save = 0 nloss = len(loss_type) batchSize = inBatchSize if problem == "8G": DATASET = '8gaussians' elif problem == "25G": DATASET = '25gaussians' else: exit(-1) ncandi = popsize kD = nPassD # # of discrim updates for each gen update kG = 1 # # of discrim updates for each gen update ntf = 256 b1 = 0.5 # momentum term of adam nz = 2 # # of dim for Z niter = 4 # # of iter at starting learning rate lr = 0.0001 # initial learning rate for adam G lrd = 0.0001 # initial learning rate for adam D N_up = 100000 save_freq = freq show_freq = freq test_deterministic = True beta = 1. GP_norm = False # if use gradients penalty on discriminator LAMBDA = 2. # hyperparameter sudof GP NSGA2 = moegan # Load the dataset # MODEL D print("Building model and compiling functions...") # Prepare Theano variables for inputs and targets real_imgs = T.matrix('real_imgs') fake_imgs = T.matrix('fake_imgs') # Create neural network model discriminator = models_uncond.build_discriminator_toy(nd=DIM, GP_norm=GP_norm) # Create expression for passing real data through the discriminator real_out = lasagne.layers.get_output(discriminator, real_imgs) # Create expression for passing fake data through the discriminator fake_out = lasagne.layers.get_output(discriminator, fake_imgs) # Create loss expressions discriminator_loss = ( lasagne.objectives.binary_crossentropy(real_out, 1) + lasagne.objectives.binary_crossentropy(fake_out, 0)).mean() # Gradients penalty norm if GP_norm is True: alpha = t_rng.uniform((batchSize, 1), low=0., high=1.) differences = fake_imgs - real_imgs interpolates = real_imgs + (alpha * differences) gradients = theano.grad(lasagne.layers.get_output( discriminator, interpolates).sum(), wrt=interpolates) slopes = T.sqrt(T.sum(T.sqr(gradients), axis=(1))) gradient_penalty = T.mean((slopes - 1.)**2) D_loss = discriminator_loss + LAMBDA * gradient_penalty b1_d = 0. else: D_loss = discriminator_loss b1_d = 0. # Create update expressions for training discriminator_params = lasagne.layers.get_all_params(discriminator, trainable=True) lrtd = theano.shared(lasagne.utils.floatX(lrd)) updates_d = lasagne.updates.adam(D_loss, discriminator_params, learning_rate=lrtd, beta1=b1_d) lrt = theano.shared(lasagne.utils.floatX(lr)) # Fd Socre Fd = theano.gradient.grad(discriminator_loss, discriminator_params) Fd_score = beta * T.log(sum(T.sum(T.sqr(x)) for x in Fd)) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_d = theano.function([real_imgs, fake_imgs], discriminator_loss, updates=updates_d) # Compile another function generating some data dis_fn = theano.function([real_imgs, fake_imgs], [(fake_out).mean(), Fd_score]) disft_fn = theano.function([real_imgs, fake_imgs], [ real_out.mean(), fake_out.mean(), (real_out > 0.5).mean(), (fake_out > 0.5).mean(), Fd_score ]) #main MODEL G noise = T.matrix('noise') generator_trainer = create_G(noise=noise, discriminator=discriminator, lr=lr, b1=b1, DIM=DIM) # Finally, launch the training loop. print("Starting training...") desc = task + '_' + name print(desc) if not os.path.isdir('front'): os.mkdir(os.path.join('front')) if not os.path.isdir('front/' + desc): os.mkdir(os.path.join('front/', desc)) if not os.path.isdir('logs'): os.mkdir(os.path.join('logs')) f_log = open('logs/%s.ndjson' % desc, 'wb') if not os.path.isdir('models'): os.mkdir(os.path.join('models/')) if not os.path.isdir('models/' + desc): os.mkdir(os.path.join('models/', desc)) instances = [] class Instance: def __init__(self, fq, fd, params, img_values): self.fq = fq self.fd = fd self.params = params self.img = img_values def f(self): return self.fq - self.fd # We iterate over epochs: for n_updates in range(N_up): xmb = toy_dataset(DATASET=DATASET, size=batchSize * kD) xmb = xmb[0:batchSize * kD] # initial G cluster if n_updates == 0: for can_i in range(0, ncandi): init_generator_trainer = create_G(noise=noise, discriminator=discriminator, lr=lr, b1=b1, DIM=DIM) zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = init_generator_trainer.train(loss_type[can_i % nloss], zmb) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = init_generator_trainer.gen(sample_zmb) frr_score, fd_score = dis_fn(xmb[0:ntf], gen_imgs) instances.append( Instance( frr_score, fd_score, lasagne.layers.get_all_param_values( init_generator_trainer.generator), gen_imgs)) else: instances_old = instances instances = [] for can_i in range(0, ncandi): for type_i in range(0, nloss): generator_trainer.set(instances_old[can_i].params) #train zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) generator_trainer.train(loss_type[type_i], zmb) #score sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = generator_trainer.gen(sample_zmb) frr_score, fd_score = dis_fn(xmb[0:ntf], gen_imgs) #save instances.append( Instance(frr_score, fd_score, generator_trainer.get(), gen_imgs)) if ncandi <= (len(instances) + len(instances_old)): if NSGA2 == True: #add parents in the pool for inst in instances_old: generator_trainer.set(inst.params) sample_zmb = floatX( np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = generator_trainer.gen(sample_zmb) frr_score, fd_score = dis_fn(xmb[0:ntf], gen_imgs) instances.append( Instance(frr_score, fd_score, generator_trainer.get(), gen_imgs)) #cromos = { idx:[float(inst.fq),-0.5*float(inst.fd)] for idx,inst in enumerate(instances) } # S1 cromos = { idx: [-float(inst.fq), 0.5 * float(inst.fd)] for idx, inst in enumerate(instances) } # S2 cromos_idxs = [idx for idx, _ in enumerate(instances)] finalpop = nsga_2_pass(ncandi, cromos, cromos_idxs) instances = [instances[p] for p in finalpop] with open('front/%s.tsv' % desc, 'wb') as ffront: for inst in instances: ffront.write( (str(inst.fq) + "\t" + str(inst.fd)).encode()) ffront.write("\n".encode()) elif nloss > 1: #sort new instances.sort( key=lambda inst: -inst.f()) #wrong def in the paper #print([inst.f() for inst in instances]) #cut best ones instances = instances[len(instances) - ncandi:] #print([inst.f() for inst in instances]) sample_xmb = toy_dataset(DATASET=DATASET, size=ncandi * ntf) sample_xmb = sample_xmb[0:ncandi * ntf] for i in range(0, ncandi): xfake = instances[i].img[0:ntf, :] xreal = sample_xmb[i * ntf:(i + 1) * ntf, :] tr, fr, trp, frp, fdscore = disft_fn(xreal, xfake) fake_rate = np.array([fr]) if i == 0 else np.append(fake_rate, fr) real_rate = np.array([tr]) if i == 0 else np.append(real_rate, tr) fake_rate_p = np.array([frp]) if i == 0 else np.append( fake_rate_p, frp) real_rate_p = np.array([trp]) if i == 0 else np.append( real_rate_p, trp) FDL = np.array([fdscore]) if i == 0 else np.append(FDL, fdscore) print(fake_rate, fake_rate_p, FDL) print(n_updates, real_rate.mean(), real_rate_p.mean()) f_log.write((str(fake_rate) + ' ' + str(fake_rate_p) + '\n' + str(n_updates) + ' ' + str(real_rate.mean()) + ' ' + str(real_rate_p.mean()) + '\n').encode()) f_log.flush() # train D #for xreal, xfake in iter_data(xmb, shuffle(fmb), size=batchSize): # cost = train_d(xreal, xfake) imgs_fakes = instances[0].img[0:int(batchSize / ncandi * kD), :] for i in range(1, len(instances)): img = instances[i].img[0:int(batchSize / ncandi * kD), :] imgs_fakes = np.append(imgs_fakes, img, axis=0) for xreal, xfake in iter_data(xmb, shuffle(imgs_fakes), size=batchSize): cost = train_d(xreal, xfake) if (n_updates % show_freq == 0 and n_updates != 0) or n_updates == 1: id_update = int(n_updates / save_freq) #metric s_zmb = floatX(np_rng.uniform(-1., 1., size=(512, nz))) xmb = toy_dataset(DATASET=DATASET, size=512) #compue mmd for all points mmd2_all = [] for i in range(0, ncandi): generator_trainer.set(instances[i].params) g_imgs = generator_trainer.gen(s_zmb) mmd2_all.append(abs(compute_metric_mmd2(g_imgs, xmb))) mmd2_all = np.array(mmd2_all) #print pareto front if NSGA2 == True: front_path = os.path.join('front/', desc) with open('%s/%d_%s_mmd2u.tsv' % (front_path, id_update, desc), 'wb') as ffront: for idx in range(0, ncandi): ffront.write((str(instances[idx].fq) + "\t" + str(instances[idx].fd) + "\t" + str(mmd2_all[idx])).encode()) ffront.write("\n".encode()) #mmd2 output print(n_updates, "mmd2u:", np.min(mmd2_all), "id:", np.argmin(mmd2_all)) #save best params = instances[np.argmin(mmd2_all)].params generator_trainer.set(params) g_imgs_min = generator_trainer.gen(s_zmb) generate_image(xmb, g_imgs_min, id_update, desc, postfix="_mmu2d_best") np.savez('models/%s/gen_%d.npz' % (desc, id_update), *lasagne.layers.get_all_param_values(discriminator)) np.savez('models/%s/dis_%d.npz' % (desc, id_update), *generator_trainer.get()) #worst_debug params = instances[np.argmax(mmd2_all)].params generator_trainer.set(params) g_imgs_max = generator_trainer.gen(s_zmb) generate_image(xmb, g_imgs_max, id_update, desc, postfix="_mmu2d_worst")
print 'COMPILING' t = time() _gen = theano.function([Z], gX) _train_d = theano.function([X, X0], d_cost, updates=d_updates) _train_g = theano.function([Z, deltaX], g_cost, updates=g_updates) _vgd_gradient = theano.function([X0, X1], vgd_gradient(X0, X1)) _reconstruction_cost = theano.function([X], T.mean(mse_data)) print '%.2f seconds to compile theano functions' % (time() - t) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(nvis, nz))) n_updates = 0 t = time() for epoch in range(1, niter + 1): for filename in npzfiles: batch_data = shuffle( np.load(filename)['images'].astype(theano.config.floatX)) for idx in tqdm(xrange(0, batch_data.shape[0] // nbatch)): imb = transform(batch_data[idx * nbatch:(idx + 1) * nbatch]) zmb = floatX(np_rng.uniform(-1., 1., size=(imb.shape[0], nz))) # generate samples samples = _gen(zmb) vgd_grad = _vgd_gradient(samples, samples) if n_updates % (k + 1) == 0: _train_g(zmb, floatX(vgd_grad)) else: _train_d(imb, samples) n_updates += 1
def reset_data(self): self.data, self.labels = shuffle(self.data, self.labels) self.index = 0
iwae_cost_func = theano.function([Xg], [log_p_x, log_p_z, log_q_z]) g_eval_func = theano.function([Xg], g_basic_costs) print "{0:.2f} seconds to compile theano functions".format(time()-t) # make file for recording test progress log_name = "{}/EVAL.txt".format(result_dir) out_file = open(log_name, 'wb') print("EXPERIMENT: {}".format(desc.upper())) Xva_blocks = [Xva] #np.split(Xva, 2, axis=0) for epoch in range(5): epoch_vae_cost = 0.0 epoch_iwae_cost = 0.0 for block_num, Xva_block in enumerate(Xva_blocks): Xva_block = shuffle(Xva_block) obs_count = Xva_block.shape[0] g_epoch_costs = [0. for c in g_basic_costs] g_batch_count = 0. for imb in tqdm(iter_data(Xva_block, size=nbatch), total=obs_count/nbatch): # transform validation batch to "image format" imb_img = floatX( train_transform(imb) ) # evaluate costs g_result = g_eval_func(imb_img) # evaluate costs more thoroughly iwae_bounds = iwae_multi_eval(imb_img, 25*25, cost_func=iwae_cost_func, iwae_num=iwae_samples) g_result[4] = np.mean(iwae_bounds) # swap in tighter bound # accumulate costs g_epoch_costs = [(v1 + v2) for v1, v2 in zip(g_result, g_epoch_costs)]
def main(): # Parameters data_path = '../datasets/' task = 'face' name = '128' start = 0 stop = 202560 input_nc = 3 loss_type = ['trickLogD','minimax','ls'] nloss = 3 shuffle_ = True batchSize = 32 fineSize = 128 flip = True ncandi = 1 # # of survived childern kD = 3 # # of discrim updates for each gen update kG = 1 # # of discrim updates for each gen update ntf = batchSize*kD b1 = 0.5 # momentum term of adam nz = 100 # # of dim for Z ngf = 64 # # of gen filters in first conv layer ndf = 64 # # of discrim filters in first conv layer niter = 25 # # of iter at starting learning rate lr = 0.0002 # initial learning rate for adam G lrd = 0.0002 # initial learning rate for adam D beta = 0.001 # the hyperparameter that balance fitness score GP_norm = False # if use gradients penalty on discriminator LAMBDA = 2. # hyperparameter of GP save_freq = 5000 show_freq = 500 begin_save = 0 test_deterministic = True # Load the dataset print("Loading data...") f = h5py.File(data_path+'img_align_celeba_128.hdf5','r') trX = f['data'] ids = range(start, stop) ################## MODEL D ####################### print("Building model and compiling functions...") # Prepare Theano variables for inputs and targets real_imgs = T.tensor4('real_imgs') fake_imgs = T.tensor4('fake_imgs') # Create neural network model discriminator = models_uncond.build_discriminator_128(ndf=ndf) # Create expression for passing real data through the discriminator real_out = lasagne.layers.get_output(discriminator, real_imgs) # Create expression for passing fake data through the discriminator fake_out = lasagne.layers.get_output(discriminator, fake_imgs) # Create loss expressions discriminator_loss = (lasagne.objectives.binary_crossentropy(real_out, 1) + lasagne.objectives.binary_crossentropy(fake_out, 0)).mean() # Gradients penalty norm if GP_norm is True: alpha = t_rng.uniform((batchSize,1,1,1), low=0.,high=1.) differences = fake_imgs - real_imgs interpolates = real_imgs + (alpha*differences) gradients = theano.grad(lasagne.layers.get_output(discriminator, interpolates).sum(), wrt=interpolates) slopes = T.sqrt(T.sum(T.sqr(gradients), axis=(1,2,3))) gradient_penalty = T.mean((slopes-1.)**2) D_loss = discriminator_loss + LAMBDA*gradient_penalty b1_d = 0. else: D_loss = discriminator_loss b1_d = b1 # Create update expressions for training discriminator_params = lasagne.layers.get_all_params(discriminator, trainable=True) lrtd = theano.shared(lasagne.utils.floatX(lrd)) updates_d = lasagne.updates.adam( D_loss, discriminator_params, learning_rate=lrtd, beta1=b1_d) lrt = theano.shared(lasagne.utils.floatX(lr)) # Diversity fitnees Fd = theano.gradient.grad(discriminator_loss, discriminator_params) Fd_score = beta*T.log(sum(T.sum(T.sqr(x)) for x in Fd)) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_d = theano.function([real_imgs, fake_imgs], discriminator_loss, updates=updates_d) # Compile another function generating some data disft_fn = theano.function([real_imgs,fake_imgs], [(real_out).mean(), (fake_out).mean(), Fd_score]) # Finally, launch the training loop. print("Starting training...") desc = task + '_' + name print desc if not os.path.isdir('logs'): os.mkdir(os.path.join('logs')) f_log = open('logs/%s.ndjson'%desc, 'wb') if not os.path.isdir('samples'): os.mkdir(os.path.join('samples/')) if not os.path.isdir('samples/'+desc): os.mkdir(os.path.join('samples/',desc)) if not os.path.isdir('models'): os.mkdir(os.path.join('models/')) if not os.path.isdir('models/'+desc): os.mkdir(os.path.join('models/',desc)) gen_new_params = [] n_updates = 0 # We iterate over epochs: for epoch in range(niter): t = time() if shuffle_ is True: ids = shuffle(ids) for index_ in iter_data(ids, size=batchSize*kD): index = sorted(index_) xmb = trX[index,:,:,:] xmb = Batch(xmb,fineSize,input_nc,flip=flip) xmb = processing_img(xmb, center=True, scale=True, convert=False) rand_idx = random.randint(start,stop-ntf-1) rand_ids = ids[rand_idx:rand_idx+ntf] rand_ids = sorted(rand_ids) sample_xmb = trX[rand_ids,:,:,:] sample_xmb = Batch(sample_xmb,fineSize,input_nc,flip=flip) sample_xmb = processing_img(sample_xmb, center=True, scale=True, convert=False) # initial G cluster if epoch + n_updates == 0: for can_i in range(0,ncandi): train_g, gen_fn, generator = create_G( loss_type=loss_type[can_i%nloss], discriminator=discriminator, lr=lr, b1=b1, ngf=ngf) for _ in range(0,kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g(zmb) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = gen_fn(sample_zmb) gen_new_params.append(lasagne.layers.get_all_param_values(generator)) if can_i == 0: g_imgs_old=gen_imgs fmb = gen_imgs[0:batchSize/ncandi*kD,:,:,:] else: g_imgs_old = np.append(g_imgs_old,gen_imgs,axis=0) fmb = np.append(fmb,gen_imgs[0:batchSize/ncandi*kD,:,:,:],axis=0) #print gen_new_params # MODEL G noise = T.matrix('noise') generator = models_uncond.build_generator_128(noise,ngf=ngf) Tgimgs = lasagne.layers.get_output(generator) Tfake_out = lasagne.layers.get_output(discriminator, Tgimgs) g_loss_logD = lasagne.objectives.binary_crossentropy(Tfake_out, 1).mean() g_loss_minimax = -lasagne.objectives.binary_crossentropy(Tfake_out, 0).mean() g_loss_ls = T.mean(T.sqr((Tfake_out - 1))) g_params = lasagne.layers.get_all_params(generator, trainable=True) up_g_logD = lasagne.updates.adam(g_loss_logD, g_params, learning_rate=lrt, beta1=b1) up_g_minimax = lasagne.updates.adam(g_loss_minimax, g_params, learning_rate=lrt, beta1=b1) up_g_ls = lasagne.updates.adam(g_loss_ls, g_params, learning_rate=lrt, beta1=b1) train_g = theano.function([noise],g_loss_logD,updates=up_g_logD) train_g_minimax = theano.function([noise],g_loss_minimax,updates=up_g_minimax) train_g_ls = theano.function([noise],g_loss_ls,updates=up_g_ls) gen_fn = theano.function([noise], lasagne.layers.get_output( generator,deterministic=True)) else: gen_old_params = gen_new_params for can_i in range(0,ncandi): for type_i in range(0,nloss): lasagne.layers.set_all_param_values(generator, gen_old_params[can_i]) if loss_type[type_i] == 'trickLogD': for _ in range(0,kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g(zmb) elif loss_type[type_i] == 'minimax': for _ in range(0,kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g_minimax(zmb) elif loss_type[type_i] == 'ls': for _ in range(0,kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g_ls(zmb) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = gen_fn(sample_zmb) _, fr_score, fd_score = disft_fn(sample_xmb,gen_imgs) fit = fr_score - fd_score if can_i*nloss + type_i < ncandi: idx = can_i*nloss + type_i gen_new_params[idx]=lasagne.layers.get_all_param_values(generator) fitness[idx]=fit fake_rate[idx]=fr_score g_imgs_old[idx*ntf:(idx+1)*ntf,:,:,:]=gen_imgs fmb[idx*batchSize/ncandi*kD:(idx+1)*batchSize/ncandi*kD,:,:,:] = \ gen_imgs[0:batchSize/ncandi*kD,:,:,:] else: fit_com = fitness - fit if min(fit_com) < 0: ids_replace = np.where(fit_com==min(fit_com)) idr = ids_replace[0][0] fitness[idr]=fit fake_rate[idr]=fr_score gen_new_params[idr] = lasagne.layers.get_all_param_values(generator) g_imgs_old[idr*ntf:(idr+1)*ntf,:,:,:]=gen_imgs fmb[idr*batchSize/ncandi*kD:(idr+1)*batchSize/ncandi*kD,:,:,:] = \ gen_imgs[0:batchSize/ncandi*kD,:,:,:] print fake_rate, fitness f_log.write(str(fake_rate) + ' '+str(fd_score) +' ' + str(fitness)+ '\n') # train D for xreal,xfake in iter_data(xmb, shuffle(fmb), size=batchSize): cost = train_d(xreal, xfake) for i in range(0, ncandi): xfake = g_imgs_old[i*ntf:(i+1)*ntf,:,:,:] xreal = sample_xmb[0:ntf,:,:,:] tr, fr, fd = disft_fn(xreal,xfake) if i == 0: fake_rate = np.array([fr]) fitness = np.array([0.]) real_rate = np.array([tr]) FDL = np.array([fd]) else: fake_rate = np.append(fake_rate,fr) fitness = np.append(fitness,[0.]) real_rate = np.append(real_rate,tr) FDL = np.append(FDL,fd) print fake_rate, FDL print (n_updates, epoch,real_rate.mean()) n_updates += 1 f_log.write(str(fake_rate)+' '+str(FDL)+ '\n'+ str(epoch)+' '+str(n_updates)+' '+str(real_rate.mean())+'\n') f_log.flush() if n_updates%show_freq == 0: blank_image = Image.new("RGB",(fineSize*8+9,fineSize*8+9)) for i in range(8): for ii in range(8): img = g_imgs_old[i*8+ii,:,:,:] img = ImgRescale(img, center=True, scale=True, convert_back=True) blank_image.paste(Image.fromarray(img),(ii*fineSize+ii+1,i*fineSize+i+1)) blank_image.save('samples/%s/%s_%d.png'%(desc,desc,n_updates/save_freq)) if n_updates%save_freq == 0 and epoch > begin_save - 1: # Optionally, you could now dump the network weights to a file like this: np.savez('models/%s/gen_%d.npz'%(desc,n_updates/save_freq), *lasagne.layers.get_all_param_values(generator)) np.savez('models/%s/dis_%d.npz'%(desc,n_updates/save_freq), *lasagne.layers.get_all_param_values(discriminator))
'n_examples', 'n_seconds', 'g_cost', 'd_cost', ] print desc.upper() n_updates = 0 n_check = 0 n_epochs = 0 n_updates = 0 n_examples = 0 t = time() sample_z0mb = rand_gen(size=(200, nz0)) # noise samples for top generator module for epoch in range(1, niter+niter_decay+1): trX = shuffle(trX) for imb in tqdm(iter_data(trX, size=nbatch), total=ntrain/nbatch): imb = transform(imb) z0mb = rand_gen(size=(len(imb), nz0)) if n_updates % (k+1) == 0: cost = _train_g(imb, z0mb) else: cost = _train_d(imb, z0mb) n_updates += 1 n_examples += len(imb) samples = np.asarray(_gen(sample_z0mb)) grayscale_grid_vis(inverse_transform(samples), (10, 20), "{}/{}.png".format(sample_dir, n_epochs)) n_epochs += 1 if n_epochs > niter: lrt.set_value(floatX(lrt.get_value() - lr/niter_decay)) if n_epochs in [1, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 150, 200]:
print 'COMPILING' t = time() _train = theano.function([X, num_z], decost, updates=gupdates) _reconstruct = theano.function([X], func_res_x) _encoder = theano.function([X], func_z) _decoder = theano.function([func_z], func_res_x) print '%.2f seconds to compile theano functions'%(time()-t) n_updates = 0 t = time() zmb = floatX(np_rng.normal(0, 1, size=(100, nz))) xmb = floatX(shuffle(X_test)[:100]) number_z = 5 for epoch in range(1, niter+niter_decay+1): X_train = shuffle(X_train) logpxz = 0 for imb in tqdm(iter_data(X_train, size=nbatch), total=ntrain/nbatch): imb = floatX(imb) logpxz += _train(imb, number_z) * len(imb) n_updates+=1 print epoch, 'logpxz', logpxz / ntrain
g_train_func = theano.function([x_in], g_cost_outputs, updates=all_updates) g_eval_func = theano.function([x_in], g_cost_outputs) print "{0:.2f} seconds to compile theano functions".format(time() - t) # make file for recording test progress log_name = "{}/RESULTS.txt".format(result_dir) out_file = open(log_name, 'wb') print("EXPERIMENT: {}".format(desc.upper())) n_check = 0 n_updates = 0 t = time() kld_weights = np.linspace(0.0, 1.0, 10) for epoch in range(1, (niter + niter_decay + 1)): Xtr = shuffle(Xtr) Xva = shuffle(Xva) # mess with the KLd cost # if ((epoch-1) < len(kld_weights)): # lam_kld.set_value(floatX([kld_weights[epoch-1]])) lam_kld.set_value(floatX([1.0])) # initialize cost arrays g_epoch_costs = [0. for i in range(5)] v_epoch_costs = [0. for i in range(5)] i_epoch_costs = [0. for i in range(5)] epoch_layer_klds = [0. for i in range(len(vae_layer_names))] vae_nlls = [] vae_klds = [] g_batch_count = 0. i_batch_count = 0. v_batch_count = 0.
"1k_va_nnd", "10k_va_nnd", "100k_va_nnd", "g_cost", "d_cost", ] print desc.upper() n_updates = 0 n_check = 0 n_epochs = 0 n_updates = 0 n_examples = 0 t = time() for epoch in range(1, niter + niter_decay + 1): trX, trY = shuffle(trX, trY) for imb, ymb in tqdm(iter_data(trX, trY, size=nbatch), total=ntrain / nbatch): imb = transform(imb) ymb = floatX(OneHot(ymb, ny)) zmb = floatX(np_rng.uniform(-1.0, 1.0, size=(len(imb), nz))) if n_updates % (k + 1) == 0: cost = _train_g(imb, zmb, ymb) else: cost = _train_d(imb, zmb, ymb) n_updates += 1 n_examples += len(imb) if (epoch - 1) % 5 == 0: g_cost = float(cost[0]) d_cost = float(cost[1]) gX, gY = gen_samples(100000) gX = gX.reshape(len(gX), -1)
'n_examples', 'n_seconds', 'g_cost', 'd_cost', ] print desc.upper() n_updates = 0 n_check = 0 n_epochs = 0 n_updates = 0 n_examples = 0 t = time() sample_z0mb = rand_gen(size=(200, nz0)) # noise samples for top generator module for epoch in range(1, niter+niter_decay+1): Xtr = shuffle(Xtr) g_cost = 0 d_cost = 0 gc_iter = 0 dc_iter = 0 for imb in tqdm(iter_data(Xtr, size=nbatch), total=ntrain/nbatch): imb = train_transform(imb) z0mb = rand_gen(size=(len(imb), nz0)) if n_updates % (k+1) == 0: g_cost += _train_g(imb, z0mb)[0] gc_iter += 1 else: d_cost += _train_d(imb, z0mb)[1] dc_iter += 1 n_updates += 1 n_examples += len(imb)
_train_g = theano.function([Z, Y, deltaX], g_cost, updates=g_updates) _vgd_gradient = theano.function([X0, X1, Y], vgd_gradient(X0, X1, Y)) _reconstruction_cost = theano.function([X], T.mean(mse_data)) print '%.2f seconds to compile theano functions' % (time() - t) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(200, nz))) sample_ymb = floatX( OneHot( np.asarray([[i for _ in range(20)] for i in range(10)]).flatten(), ny)) n_updates = 0 t = time() for epoch in range(niter): print 'cifar 10, vgd, %s, iter %d' % (desc, epoch) trX, trY = shuffle(trX, trY) for imb, ymb in tqdm(iter_data(trX, trY, size=nbatch), total=ntrain / nbatch): imb = transform(imb.reshape(imb.shape[0], nc, npx, npx)) ymb = floatX(OneHot(ymb, ny)) zmb = floatX(np_rng.uniform(-1., 1., size=(imb.shape[0], nz))) # generate samples samples = _gen(zmb, ymb) vgd_grad = _vgd_gradient(samples, samples, ymb) if n_updates % (k + 1) == 0: _train_g(zmb, ymb, floatX(vgd_grad)) else: _train_d(imb, samples, ymb)