def train(cnn, epochs=80, learn_rate=0.001, batch_size=100, gpu=True): """ Train a regression CNN. Note that you do not need this function. Included for refrence. """ if gpu: cnn.cuda() # Set up L2 loss criterion = nn.MSELoss() optimizer = torch.optim.Adam(cnn.parameters(), lr=learn_rate) # Loading & transforming data (x_train, y_train), (x_test, y_test) = load_cifar10() train_rgb, train_grey = process(x_train, y_train) test_rgb, test_grey = process(x_test, y_test) print(train_rgb) print(len(train_rgb[0])) print(len(train_rgb[0][0])) print(len(train_rgb[0][0][0])) input() print("Beginning training ...") for epoch in range(epochs): # Train the Model cnn.train() # Change model to 'train' mode for i, (xs, ys) in enumerate(get_batch(train_grey, train_rgb, batch_size)): images, labels = get_torch_vars(xs, ys, gpu) # Forward + Backward + Optimize optimizer.zero_grad() outputs = cnn(images) loss = criterion(outputs, labels) loss.backward() optimizer.step() print('Epoch [%d/%d], Loss: %.4f' % (epoch+1, epochs, loss.data[0])) # Evaluate the model cnn.eval() # Change model to 'eval' mode (BN uses moving mean/var). losses = [] for i, (xs, ys) in enumerate(get_batch(test_grey, test_rgb, batch_size)): images, labels = get_torch_vars(xs, ys, gpu) outputs = cnn(images) val_loss = criterion(outputs, labels) losses.append(val_loss.data[0]) val_loss = np.mean(losses) print('Epoch [%d/%d], Val Loss: %.4f' % (epoch+1, epochs, val_loss)) # Save the Trained Model torch.save(cnn.state_dict(), 'regression_cnn_k%d_f%d.pkl' % ( args.kernel, args.num_filters))
def plot_activation(args, cnn, reg=True): # LOAD THE COLOURS CATEGORIES colours = np.load(args.colours)[0] num_colours = np.shape(colours)[0] (x_train, y_train), (x_test, y_test) = load_cifar10() test_rgb, test_grey = process_cls(x_test, y_test, downsize_input=args.downsize_input) test_rgb_cat = get_rgb_cat(test_rgb, colours) # Take the idnex of the test image id = args.index outdir = "outputs/" + args.experiment_name + '/act' + str(id) if not os.path.exists(outdir): os.makedirs(outdir) images, labels = get_torch_vars(np.expand_dims(test_grey[id], 0), np.expand_dims(test_rgb_cat[id], 0), args.gpu, reg) cnn.cpu() outputs = cnn(images) _, predicted = torch.max(outputs.data, 1, keepdim=True) predcolor = get_cat_rgb(predicted.cpu().numpy()[0, 0, :, :], colours) img = predcolor toimage(predcolor, cmin=0, cmax=1) \ .save(os.path.join(outdir, "output_%d.png" % id)) if not args.downsize_input: img = np.tile(np.transpose(test_grey[id], [1, 2, 0]), [1, 1, 3]) else: img = np.transpose(test_grey[id], [1, 2, 0]) toimage(img, cmin=0, cmax=1) \ .save(os.path.join(outdir, "input_%d.png" % id)) img = np.transpose(test_rgb[id], [1, 2, 0]) toimage(img, cmin=0, cmax=1) \ .save(os.path.join(outdir, "input_%d_gt.png" % id)) def add_border(img): return np.pad(img, 1, "constant", constant_values=1.0) def draw_activations(path, activation, imgwidth=4): img = np.vstack([ np.hstack([ add_border(filter) for filter in activation[i * imgwidth:(i + 1) * imgwidth, :, :] ]) for i in range(activation.shape[0] // imgwidth) ]) scipy.misc.imsave(path, img) for i, tensor in enumerate( [cnn.out1, cnn.out2, cnn.out3, cnn.out4, cnn.out5]): draw_activations( os.path.join(outdir, "conv%d_out_%d.png" % (i + 1, id)), tensor.data.cpu().numpy()[0]) print("visualization results are saved to %s" % outdir)
def train(self, epochs, batch_size=128, sample_interval=50): x_train_public, y_train_public, _, _, \ x_train_secret, y_train_secret, _, _ = load_data.load_cifar10() label_secret = np.ones(shape=(batch_size, 1)) label_public = np.zeros(shape=(batch_size, 1)) for epoch in range(epochs): start = time.time() print("In the epoch ", epoch, "/", epochs) ####### generate pics for public pics ####### idx_public = random.sample(range(0, x_train_public.shape[0]), batch_size) image_batch_public = x_train_public[idx_public, :, :, :] label_batch_public = y_train_public[idx_public, :] generated_images_public = self.ae.predict(image_batch_public) ####### generate pics for secret pics ####### idx_secret = random.sample(range(0, x_train_secret.shape[0]), batch_size) image_batch_secret = x_train_secret[idx_secret, :, :, :] label_batch_secret = y_train_secret[idx_secret, :] generated_images_secret = self.ae.predict(image_batch_secret) l1 = self.attack.train_on_batch(image_batch_public, [label_public, label_batch_public]) l2 = self.attack.train_on_batch(generated_images_public, [label_public, label_batch_public]) l3 = self.attack.train_on_batch(image_batch_secret, [label_secret, label_batch_secret]) l4 = self.attack.train_on_batch(generated_images_secret, [label_secret, label_batch_secret]) g_loss1 = self.combined_model.train_on_batch( image_batch_public, [label_public, image_batch_public, label_batch_public]) g_loss2 = self.combined_model.train_on_batch( image_batch_secret, [label_public, image_batch_secret, label_batch_secret]) print("Epoch ", epoch, "took time", time.time() - start) if epoch % 20 == 0: self.save_model(epoch) self.sample_images(image_batch_secret[0], epoch, 'secret') self.sample_images(image_batch_public[0], epoch, 'public')
""" K-means clustering of colors in RGB space. You do not need this file for this assignment; it is included for completeness to show how the colors categories were generated. """ from __future__ import print_function import numpy as np import scipy.misc import scipy.cluster from load_data import load_cifar10 HORSE_CATEGORY = 7 k = 24 (x_train, y_train), (x_test, y_test) = load_cifar10() MAX_PIXEL = 256.0 x_train = x_train / MAX_PIXEL x_train = x_train[np.where(y_train == HORSE_CATEGORY)[0], :, :, :] train_rgb = np.reshape(x_train, [-1, 3]) result = scipy.cluster.vq.kmeans(train_rgb, k) np.save("colors/color_kmeans%d_horse.npy" % k, result)
import theano.tensor as T import numpy as np import matplotlib.pyplot as plt plt.ion() import load_data from theano.tensor.nnet import conv from theano.tensor.signal import downsample # MULTIVERSO: import multiverso import multiverso as mv # MULTIVERSO: the sharedvar in theano_ext acts same like Theano's # sharedVariables. But it use multiverso as the backend from multiverso.theano_ext import sharedvar x_train, t_train, x_test, t_test = load_data.load_cifar10() labels_test = np.argmax(t_test, axis=1) # reshape data x_train = x_train.reshape((x_train.shape[0], 3, 32, 32)) x_test = x_test.reshape((x_test.shape[0], 3, 32, 32)) # define symbolic Theano variables x = T.tensor4() t = T.matrix() # define model: neural network def floatX(x): return np.asarray(x, dtype=theano.config.floatX)
valid_batch_acc.data.cpu().numpy()), #[0]), 'avgacc:{:.3f}'.format(np.mean(prev_accs_valid))) start = time.time() if __name__ == "__main__": load_ = 0 save_ = 0 save_file = home + '/Documents/tmp/model.pt' #Load data # train_x, train_y, valid_x, valid_y = load_mnist() train_x, train_y, valid_x, valid_y = load_cifar10() train_x = np.reshape(train_x, [train_x.shape[0], 3, 32, 32]) valid_x = np.reshape(valid_x, [valid_x.shape[0], 3, 32, 32]) print(train_x.shape) print(train_y.shape) print(valid_x.shape) print(valid_y.shape) print() #Init model print('Loading model') use_cuda = True # torch.cuda.is_available() n_gpus = 1 #2 #torch.cuda.device_count() if n_gpus < 2: os.environ['CUDA_VISIBLE_DEVICES'] = '0' # '1' #which gpu
import numpy as np import matplotlib.pyplot as plt plt.ion() import load_data from theano.tensor.nnet import conv from theano.tensor.signal import downsample # MULTIVERSO: import multiverso import multiverso as mv # MULTIVERSO: the sharedvar in theano_ext acts same like Theano's # sharedVariables. But it use multiverso as the backend from multiverso.theano_ext import sharedvar x_train, t_train, x_test, t_test = load_data.load_cifar10() labels_test = np.argmax(t_test, axis=1) # reshape data x_train = x_train.reshape((x_train.shape[0], 3, 32, 32)) x_test = x_test.reshape((x_test.shape[0], 3, 32, 32)) # define symbolic Theano variables x = T.tensor4() t = T.matrix() # define model: neural network def floatX(x):
import numpy as np import matplotlib.pyplot as plt import random import imageio import pickle import os from load_data import load_cifar10 from PIL import Image import time cifar10_dir = 'D:\\assignment1\\cifar-10-batches-py' x_train, y_train, x_test, y_test = load_cifar10(cifar10_dir) x_train = np.reshape(x_train, (x_train.shape[0], -1)) x_test = np.reshape(x_test, (x_test.shape[0], -1)) #print(mean_image[:10]) #plt.figure(figsize=(4,4)) #plt.imshow(mean_image.reshape((32,32,3)).astype('uint8')) #plt.show() #x_train = np.hstack([x_train, np.ones((x_train.shape[0], 1))]) #x_val = np.hstack([x_val, np.ones((x_val.shape[0], 1))]) #x_test = np.hstack([x_test, np.ones((x_test.shape[0], 1))]) #x_dev = np.hstack([x_dev, np.ones((x_dev.shape[0], 1))]) num_sample = x_train.shape[0] num_class = 10 din = x_train.shape[1] dout = 10
if __name__ == "__main__": load_ = 0 save_ = 0 save_file = home+'/Documents/tmp/model.pt' #Load data # train_x, train_y, valid_x, valid_y = load_mnist() train_x, train_y, valid_x, valid_y = load_cifar10() train_x = np.reshape(train_x, [train_x.shape[0], 3, 32, 32]) valid_x = np.reshape(valid_x, [valid_x.shape[0], 3, 32, 32]) print (train_x.shape) print (train_y.shape) print (valid_x.shape) print (valid_y.shape) print() #Init model print ('Loading model') use_cuda = True# torch.cuda.is_available() n_gpus = 1#2 #torch.cuda.device_count() if n_gpus < 2:
def train(args, cnn=None): # Set the maximum number of threads to prevent crash in Teaching Labs torch.set_num_threads(5) # Numpy random seed np.random.seed(args.seed) # Save directory save_dir = "outputs/" + args.experiment_name # LOAD THE COLOURS CATEGORIES colours = np.load(args.colours, encoding='bytes')[0] num_colours = np.shape(colours)[0] # INPUT CHANNEL num_in_channels = 1 if not args.downsize_input else 3 # LOAD THE MODEL if cnn is None: if args.model == "CNN": cnn = CNN(args.kernel, args.num_filters, num_colours, num_in_channels) elif args.model == "UNet": cnn = UNet(args.kernel, args.num_filters, num_colours, num_in_channels) # LOSS FUNCTION criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(cnn.parameters(), lr=args.learn_rate) # DATA print("Loading data...") (x_train, y_train), (x_test, y_test) = load_cifar10() print("Transforming data...") train_rgb, train_grey = process_cls(x_train, y_train, downsize_input=args.downsize_input) train_rgb_cat = get_rgb_cat(train_rgb, colours) test_rgb, test_grey = process_cls(x_test, y_test, downsize_input=args.downsize_input) test_rgb_cat = get_rgb_cat(test_rgb, colours) # Create the outputs folder if not created already if not os.path.exists(save_dir): os.makedirs(save_dir) print("Beginning training ...") if args.gpu: cnn.cuda() start = time.time() train_losses = [] valid_losses = [] valid_accs = [] for epoch in range(args.epochs): # Train the Model cnn.train() # change model to 'train' mode losses = [] for i, (xs, ys) in enumerate( get_batch(train_grey, train_rgb_cat, args.batch_size)): images, labels = get_torch_vars(xs, ys, args.gpu, False) # Forward + Backward + Optimize optimizer.zero_grad() outputs = cnn(images) loss = compute_loss(criterion, outputs, labels, batch_size=args.batch_size, num_colours=num_colours) loss.backward() optimizer.step() losses.append(loss.data.item()) # plot training images if args.plot: _, predicted = torch.max(outputs.data, 1, keepdim=True) plot_cls(xs, ys, predicted.cpu().numpy(), colours, save_dir + '/train_%d.png' % epoch, args.visualize, args.downsize_input) # plot training images avg_loss = np.mean(losses) train_losses.append(avg_loss) time_elapsed = time.time() - start print('Epoch [%d/%d], Loss: %.4f, Time (s): %d' % (epoch + 1, args.epochs, avg_loss, time_elapsed)) # Evaluate the model cnn.eval() # Change model to 'eval' mode (BN uses moving mean/var). val_loss, val_acc = run_validation_step( cnn, criterion, test_grey, test_rgb_cat, args.batch_size, colours, save_dir + '/test_%d.png' % epoch, args.visualize, args.downsize_input, args.gpu, False) time_elapsed = time.time() - start valid_losses.append(val_loss) valid_accs.append(val_acc) print('Epoch [%d/%d], Val Loss: %.4f, Val Acc: %.1f%%, Time(s): %d' % (epoch + 1, args.epochs, val_loss, val_acc, time_elapsed)) # Plot training curve plt.figure() plt.plot(train_losses, "ro-", label="Train") plt.plot(valid_losses, "go-", label="Validation") plt.legend() plt.title("Loss") plt.xlabel("Epochs") plt.savefig(save_dir + "/training_curve.png") if args.checkpoint: print('Saving model...') torch.save(cnn.state_dict(), args.checkpoint) return cnn
def main(args): input_shape = (32, 32, 3) num_classes = 10 batch_size = int(args.batch_size) epochs = int(args.epochs) # Load cifar10 data (X_train, y_train), (X_test, y_test) = load_cifar10() # Define model model = MobileNetV2(input_shape=input_shape, nb_class=num_classes, include_top=True).build() MODEL_NAME = "mobilenetv2__" + datetime.now().strftime("%Y-%m%d-%H%M%S") # Path & Env. settings ------------------------------------------------------------- LOG_DIR = os.path.join("./log", MODEL_NAME) if not os.path.exists(LOG_DIR): os.makedirs(LOG_DIR) shutil.copyfile(os.path.join(os.getcwd(), 'train.sh'), os.path.join(LOG_DIR, 'train.sh')) shutil.copyfile(os.path.join(os.getcwd(), 'train.py'), os.path.join(LOG_DIR, 'train.py')) shutil.copyfile(os.path.join(os.getcwd(), 'models.py'), os.path.join(LOG_DIR, 'models.py')) MODEL_WEIGHT_CKP_PATH = os.path.join(LOG_DIR, "best_weights.h5") MODEL_TRAIN_LOG_CSV_PATH = os.path.join(LOG_DIR, "train_log.csv") # ---------------------------------------------------------------------------------- # Compile model model.summary() model.compile( optimizer=keras.optimizers.SGD(lr=2e-2, momentum=0.9, decay=0.0, nesterov=False), loss='categorical_crossentropy', loss_weights=[ 1.0 ], # The loss weight for model output without regularization loss. Set 0.0 due to validate only regularization factor. metrics=['accuracy']) # Load initial weights from pre-trained model if args.trans_learn: model.load_weights(str(args.weights_path), by_name=False) print("Load model init weights from", MODEL_INIT_WEIGHTS_PATH) print("Produce training results in", LOG_DIR) # Set learning rate learning_rates = [] for i in range(5): learning_rates.append(2e-2) for i in range(50 - 5): learning_rates.append(1e-2) for i in range(100 - 50): learning_rates.append(8e-3) for i in range(150 - 100): learning_rates.append(4e-3) for i in range(200 - 150): learning_rates.append(2e-3) for i in range(300 - 200): learning_rates.append(1e-3) # Set model callbacks callbacks = [] callbacks.append( ModelCheckpoint(MODEL_WEIGHT_CKP_PATH, monitor='val_loss', save_best_only=True, save_weights_only=True)) callbacks.append(CSVLogger(MODEL_TRAIN_LOG_CSV_PATH)) callbacks.append( LearningRateScheduler(lambda epoch: float(learning_rates[epoch]))) # data generator with data augumatation datagen = keras.preprocessing.image.ImageDataGenerator( featurewise_center=False, featurewise_std_normalization=False, rotation_range=0.0, width_shift_range=0.2, height_shift_range=0.2, vertical_flip=False, horizontal_flip=True) datagen.fit(X_train) # Train model history = model.fit_generator(datagen.flow(X_train, y_train, batch_size=batch_size), steps_per_epoch=len(X_train) / batch_size, epochs=epochs, verbose=1, callbacks=callbacks, validation_data=(X_test, y_test)) # Validation val_loss, val_acc = model.evaluate(X_test, y_test, verbose=1) print("--------------------------------------") print("model name : ", MODEL_NAME) print("validation loss : {:.5f}".format(val_loss)) print("validation accuracy : {:.5f}".format(val_acc)) # Save model as "instance" ins_name = 'model_instance' ins_path = os.path.join(LOG_DIR, ins_name) + '.h5' model.save(ins_path) # Save model as "architechture" arch_name = 'model_fin_architechture' arch_path = os.path.join(LOG_DIR, arch_name) + '.json' json_string = model.to_json() with open(arch_path, 'w') as f: f.write(json_string)
def run_ResNet(dataset, depth, n_epochs, batch_size, lookahead, alpha0, experiment_dir, epsilon, random_seed, output_file_base_name, gradient_clipping=None, force=False, n_validation_resamples=3., n_test_resamples=5.): # LOAD DATA if "mnist_plus_rot" in dataset: datasets = load_mnist_w_rotations(dataset, flatten=False, split=(70000, 10000, 20000)) dataset_name = "mnist_w_rotation" input_layer = InputLayer(shape=(None, 1, 28, 28)) output_size = 10 elif "mnist" in dataset: # We follow the approach used in [2] to split the MNIST dataset. datasets = load_mnist(dataset, flatten=False, split=(45000, 5000, 10000)) dataset_name = "mnist" input_layer = InputLayer(shape=(None, 1, 28, 28)) output_size = 10 elif "cifar10" in dataset: # We split the Cifar-10 dataset according to [2]. datasets = load_cifar10(dataset, flatten=False, split=(45000, 5000, 10000)) dataset_name = "cifar10" input_layer = InputLayer(shape=(None, 3, 32, 32)) output_size = 10 train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] train_set_size = int(train_set_y.shape[0].eval()) valid_set_size = int(valid_set_y.shape[0].eval()) test_set_size = int(test_set_y.shape[0].eval()) print 'Dataset {} loaded ({:,}|{:,}|{:,})'.format(dataset_name, train_set_size, valid_set_size, test_set_size) # compute number of minibatches for training, validation and testing n_train_batches = int(np.ceil(train_set_size / batch_size)) n_valid_batches = int(np.ceil(valid_set_size / batch_size)) n_test_batches = int(np.ceil(test_set_size / batch_size)) # BUILD MODEL print 'Building the model ...' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch index.tag.test_value = 0 # epoch = T.scalar() x = T.tensor4('x') # the data is presented as rasterized images y = T.vector( 'y') # the labels are presented as 1D vector of [floatX] labels. # Test values are useful for debugging with THEANO_FLAGS="compute_test_value=warn" x.tag.test_value = train_set_x[:batch_size].eval() y.tag.test_value = train_set_y[:batch_size].eval() input_layer.input_var = x layers_per_phase = ((depth - 2) // 9) * 3 network, infos = build_sb_resnet(input_layer, depth, output_size) print "Number of parameters in model: {:,}".format( lasagne.layers.count_params(network, trainable=True)) # Create a loss expression for training, i.e., a scalar objective we want # to minimize (for our multi-class problem, it is the cross-entropy loss): prediction = lasagne.layers.get_output(network) ll_term = lasagne.objectives.categorical_crossentropy( prediction, T.cast(y, dtype="int32")) kl_term_1 = calc_kl_divergence(infos[0], alpha=1., beta=alpha0) kl_term_2 = calc_kl_divergence(infos[1], alpha=1., beta=alpha0) kl_term_3 = calc_kl_divergence(infos[2], alpha=1., beta=alpha0) kl_term = kl_term_1 + kl_term_2 + kl_term_3 cost = T.mean(ll_term + kl_term) # Compute average number of layers that have a stick length >= 1% in each phase. avg_n_layers_phase1 = calc_avg_n_layers(infos[0]) avg_n_layers_phase2 = calc_avg_n_layers(infos[1]) avg_n_layers_phase3 = calc_avg_n_layers(infos[2]) avg_kl_term_1 = T.mean(kl_term_1) avg_kl_term_2 = T.mean(kl_term_2) avg_kl_term_3 = T.mean(kl_term_3) # Build the expresson for the cost function. params = lasagne.layers.get_all_params(network, trainable=True) # If params already exist and 'force' is False, reload parameters. params_pkl_filename = pjoin( experiment_dir, 'conv_sb-resnet_params_' + output_file_base_name + '.pkl') print "Checking if '{}' already exists.".format(params_pkl_filename) if os.path.isfile(params_pkl_filename) and not force: print "Yes! Reloading existing parameters and resuming training (use --force to overwrite)." last_params = cPickle.load(open(params_pkl_filename, 'rb')) for param, last_param in zip(params, last_params): param.set_value(last_param) elif force: print "Yes! but --force was used. Starting from scratch." else: print "No! Starting from scratch." gradients = dict(zip(params, T.grad(cost, params))) if gradient_clipping is not None: grad_norm = T.sqrt( sum(map(lambda d: T.sqr(d).sum(), gradients.values()))) # Note that rescaling is one if grad_norm <= threshold. rescaling = gradient_clipping / T.maximum(grad_norm, gradient_clipping) new_gradients = OrderedDict() for param, gparam in gradients.items(): gparam_clipped = gparam * rescaling new_gradients[param] = gparam_clipped gradients = new_gradients updates = utils.get_adam_updates_from_gradients(gradients) # Compile theano function for training. This updates the model parameters and # returns the training nll term, kl term, and the avg. nb. of layers used in each phase. print 'Compiling train function ...' compiling_start = time.time() train_model = theano.function( inputs=[index], outputs=[ ll_term.mean(), kl_term.mean(), avg_n_layers_phase1, avg_n_layers_phase2, avg_n_layers_phase3, avg_kl_term_1, avg_kl_term_2, avg_kl_term_3 ], updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) print "{:.2f}".format((time.time() - compiling_start) / 60.) # Create a loss expression for validation/testing test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy( test_prediction, T.cast(y, dtype="int32")) test_loss = test_loss.mean() test_error = T.sum(T.neq(T.argmax(test_prediction, axis=1), y), dtype=theano.config.floatX) print 'Compiling valid function ...' compiling_start = time.time() valid_model = theano.function( inputs=[index], outputs=[test_loss, test_error], givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) print "{:.2f}".format((time.time() - compiling_start) / 60.) print 'Compiling test function ...' compiling_start = time.time() test_model = theano.function( inputs=[index], outputs=[test_loss, test_error], givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) print "{:.2f}".format((time.time() - compiling_start) / 60.) ############### # TRAIN MODEL # ############### print 'Training for {} epochs ...'.format(n_epochs) best_params = None best_valid_error = np.inf best_iter = 0 start_time = time.clock() results_filename = pjoin( experiment_dir, "conv_sb-resnet_results_" + output_file_base_name + ".txt") if os.path.isfile(results_filename) and not force: last_result = open(results_filename, 'rb').readlines()[-1] idx_start = len("epoch ") idx_end = last_result.find(",", idx_start + 1) start_epoch = int(last_result[idx_start:idx_end]) + 1 results_file = open(results_filename, 'ab') else: start_epoch = 0 results_file = open(results_filename, 'wb') stop_training = False for epoch_counter in range(start_epoch, n_epochs): if stop_training: break # Train this epoch epoch_start_time = time.time() avg_training_loss_tracker = 0. avg_training_kl_tracker = 0. avg_n_layers_phase1_tracker = 0. avg_n_layers_phase2_tracker = 0. avg_n_layers_phase3_tracker = 0. avg_kl_term_1_tracker = 0. avg_kl_term_2_tracker = 0. avg_kl_term_3_tracker = 0. for minibatch_index in xrange(n_train_batches): avg_training_loss, avg_training_kl, avg_n_layers_phase1, avg_n_layers_phase2, avg_n_layers_phase3, avg_kl_term_1, avg_kl_term_2, avg_kl_term_3 = train_model( minibatch_index) if minibatch_index % 1 == 0: results = "batch #{}-{}, avg n_layers per phase ({:.2f}|{:.2f}|{:.2f})/{}, training loss (nll) {:.4f}, training kl-div {:.4f} ({:.4f}|{:.4f}|{:.4f}), time {:.2f}m" results = results.format(epoch_counter, minibatch_index, float(avg_n_layers_phase1), float(avg_n_layers_phase2), float(avg_n_layers_phase3), layers_per_phase, float(avg_training_loss), float(avg_training_kl), float(avg_kl_term_1), float(avg_kl_term_2), float(avg_kl_term_3), (time.time() - epoch_start_time) / 60.) print results if np.isnan(avg_training_loss): msg = "NaN detected! Stopping." print msg results_file.write(msg + "\n") results_file.flush() sys.exit(1) avg_training_loss_tracker += avg_training_loss avg_training_kl_tracker += avg_training_kl avg_n_layers_phase1_tracker += avg_n_layers_phase1 avg_n_layers_phase2_tracker += avg_n_layers_phase2 avg_n_layers_phase3_tracker += avg_n_layers_phase3 avg_kl_term_1_tracker += avg_kl_term_1 avg_kl_term_2_tracker += avg_kl_term_2 avg_kl_term_3_tracker += avg_kl_term_3 epoch_end_time = time.time() # Compute some infos about training. avg_training_loss_tracker /= n_train_batches avg_training_kl_tracker /= n_train_batches avg_n_layers_phase1_tracker /= n_train_batches avg_n_layers_phase2_tracker /= n_train_batches avg_n_layers_phase3_tracker /= n_train_batches avg_kl_term_1_tracker /= n_train_batches avg_kl_term_2_tracker /= n_train_batches avg_kl_term_3_tracker /= n_train_batches # Compute validation error --- sample multiple times to simulate posterior predictive distribution valid_errors = np.zeros((n_valid_batches, )) valid_loss = np.zeros((n_valid_batches, )) for idx in xrange(int(n_validation_resamples)): temp_valid_loss, temp_valid_errors = zip( *[valid_model(i) for i in xrange(n_valid_batches)]) valid_errors += temp_valid_errors valid_loss += temp_valid_loss valid_loss = np.sum( valid_loss / n_validation_resamples) / n_valid_batches valid_nb_errors = np.sum(valid_errors / n_validation_resamples) valid_error = valid_nb_errors / valid_set_size results = ( "epoch {}, avg n_layers per phase ({:.2f}|{:.2f}|{:.2f})/{}, train loss (nll) {:.4f}, " "train kl-div {:.4f}, train kl-div per phase ({:.4f}|{:.4f}|{:.4f}), " "valid loss {:.4f}, valid error {:.2%} ({:,}), time {:.2f}m") if valid_error < best_valid_error: best_iter = epoch_counter best_valid_error = valid_error results += " **" # Save progression best_params = [param.get_value().copy() for param in params] cPickle.dump(best_params, open(params_pkl_filename, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL) elif epoch_counter - best_iter > lookahead: stop_training = True # Report and save progress. results = results.format(epoch_counter, avg_n_layers_phase1_tracker, avg_n_layers_phase2_tracker, avg_n_layers_phase3_tracker, layers_per_phase, avg_training_loss_tracker, avg_training_kl_tracker, avg_kl_term_1_tracker, avg_kl_term_2_tracker, avg_kl_term_3_tracker, valid_loss, valid_error, valid_nb_errors, (epoch_end_time - epoch_start_time) / 60) print results results_file.write(results + "\n") results_file.flush() end_time = time.clock() # Reload best model. for param, best_param in zip(params, best_params): param.set_value(best_param) # Compute test error --- sample multiple times to simulate posterior predictive distribution test_errors = np.zeros((n_test_batches, )) test_loss = np.zeros((n_test_batches, )) for idx in xrange(int(n_test_resamples)): temp_test_loss, temp_test_errors = zip( *[test_model(i) for i in xrange(n_test_batches)]) test_errors += temp_test_errors test_loss += temp_test_loss test_loss = np.sum(test_loss / n_test_resamples) / n_test_batches test_nb_errors = np.sum(test_errors / n_test_resamples) test_error = test_nb_errors / test_set_size results = "Done! best epoch {}, test loss {:.4f}, test error {:.2%} ({:,}), training time {:.2f}m" results = results.format(best_iter, test_loss, test_error, test_nb_errors, (end_time - start_time) / 60) print results results_file.write(results + "\n") results_file.close() print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
from keras.models import Model from keras.layers.normalization import BatchNormalization from keras.layers import Conv2D, MaxPooling2D from keras.layers.advanced_activations import LeakyReLU import load_data from keras.models import load_model import numpy as np import os def generated_images(data): model = load_model('./models_vaegan/1980_autoencoder.h5') generated_images = model.predict(data) return generated_images x_train_public, y_train_public, x_test_public, y_test_public,\ x_train_secret, y_train_secret, x_test_secret, y_test_secret = load_data.load_cifar10() x_train_public_generated = generated_images(x_train_public) x_test_public_generated = generated_images(x_test_public) x_train_secret_generated = generated_images(x_train_secret) x_test_secret_generated = generated_images(x_test_secret) def cnn_model(): d0 = Input((x_train_public.shape[1:])) # x0 = Dense(img_rows*img_cols*1, activation = 'relu')(d0) # x0 = Reshape((img_rows,img_cols,1))(x0) x = Conv2D(32, (5, 5), padding='same', name='id_conv1')(d0) x = LeakyReLU(0.2)(x) x = BatchNormalization()(x) x = MaxPooling2D((3, 3), padding='same', strides=(2, 2))(x)