def test(depth, p, dataset, num_epochs=200, seed=None): if seed is None: seed = 0 np.random.seed(seed) data = None if dataset == "mnist": data = mnist.load().astype(np.float32) elif dataset == "cifar10": data = cifar10.load().astype(np.float32) num_observations, input_dim = data.shape data_split_index = int(num_observations * 0.9) training_data_iterator = DataIterator(batch_size, data[:data_split_index], data[:data_split_index]) validation_data_iterator = DataIterator(batch_size, data[data_split_index:], data[data_split_index:]) # make net net = Network(input_dim, input_dim, hidden_layers=([ 1000, ] * depth), p=p) losses = net.train(training_data_iterator, validation_data_iterator, num_epochs=num_epochs) net.close() return losses
def create_train_test_split(digits, n_testimages, transpose_trainig=True, transpose_test=False): # load mnist data x_train, y_train, x_test, y_test = mnist.load() # pick the images of the first digit from training and test data d1_train = pick_digit_from_data(x_train, y_train, digits[0]) d1_test = pick_digit_from_data(x_test, y_test, digits[0])[0:n_testimages] # pick the images of the second digit from training and test data d2_train = pick_digit_from_data(x_train, y_train, digits[1]) d2_test = pick_digit_from_data(x_test, y_test, digits[1])[0:n_testimages] # Transpose the data if parameter are set to True if transpose_trainig: d1_train = d1_train.T d2_train = d2_train.T if transpose_test: d1_test = d1_test.T d2_test = d2_test.T return d1_train, d1_test, d2_train, d2_test
def process_data(task, train_size=60000, test_size=10000, val_perc=0.1): ''' Creates the datasets to be used in the logistic regression task. ''' if task == 'logistic_regression': excluded = [0, 1, 4, 5, 6, 7, 8, 9] true_class = 2 X_train, Y_train, X_test, Y_test = mnist.load() train_data, test_data = (X_train, Y_train), (X_test, Y_test) train_data, test_data = partition_dataset(train_data, test_data, train_size, test_size) for digit in excluded: train_data, test_data = remove_digit(train_data, test_data, digit) train_data, val_data = create_validation_set(train_data, val_perc) train_data = normalize_data(train_data) val_data = normalize_data(val_data) test_data = normalize_data(test_data) train_data = append_ones(train_data) val_data = append_ones(val_data) test_data = append_ones(test_data) if task == 'logistic_regression': train_data = binary_class(train_data, true_class) val_data = binary_class(val_data, true_class) test_data = binary_class(test_data, true_class) return train_data, val_data, test_data
def load_binary_dataset(class1: int, class2: int): """ Loads, prunes and splits the dataset into train, and validation. """ train_size = 20000 val_size = 10000 X_train, Y_train, X_val, Y_val = mnist.load() # First 20000 images from train set X_train, Y_train = X_train[:train_size], Y_train[:train_size] # Last 2000 images from test set X_val, Y_val = X_val[:val_size], Y_val[:val_size] X_train, Y_train = binary_prune_dataset( class1, class2, X_train, Y_train ) X_val, Y_val = binary_prune_dataset( class1, class2, X_val, Y_val ) # Reshape to (N, 1) Y_train = Y_train.reshape(-1, 1) Y_val = Y_val.reshape(-1, 1) print(f"Train shape: X: {X_train.shape}, Y: {Y_train.shape}") print(f"Validation shape: X: {X_val.shape}, Y: {Y_val.shape}") return X_train, Y_train, X_val, Y_val
def get_mnist(): """ Load the MNIST data """ mnist.init() x_train, t_train, x_test, t_test = mnist.load() print("Loaded MNIST data") return x_train, t_train, x_test, t_test
def main(): training_data, validation_data, test_data = mnist.load() net = mlp.MLP([784,36,10]) epochs = 500 mini_batch_size = 10 learning_rate = 0.5 lmbda = 5.0 drop_prob = 0.5 net.sgd(training_data, epochs, mini_batch_size, test_data, learning_rate, lmbda, drop_prob)
def main(): training_data, validation_data, test_data = mnist.load() net = mlp.MLP([784, 36, 10]) epochs = 500 mini_batch_size = 10 learning_rate = 0.5 lmbda = 5.0 drop_prob = 0.5 net.sgd(training_data, epochs, mini_batch_size, test_data, learning_rate, lmbda, drop_prob)
def load_data(): mnist = {} mnist["training_images"], mnist["training_labels"], mnist[ "test_images"], mnist["test_labels"] = load() mnist["training_images"] = mnist["training_images"].reshape( (60000, 1, 28, 28)) mnist["test_images"] = mnist["test_images"].reshape((10000, 1, 28, 28)) mnist["training_labels"] = one_hot(mnist["training_labels"]) mnist["test_labels"] = one_hot(mnist["test_labels"]) return mnist
def get_mnist(num_train): x_train, y_train, x_test, y_test = mnist.load() x_train, y_train = shuffle(x_train, y_train) x_train, y_train = x_train[:num_train], y_train[:num_train] y_train = one_hot(y_train, 10) y_test = one_hot(y_test, 10) x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=.3) return x_train, y_train, x_val, y_val, x_test, y_test
def test_feedforward_network_n2048(benchmark): global training_data if not training_data: training_data = mnist.load("train") net = feedforward.Network([28 * 28, 2048, 10]) @benchmark def train(): net.stochastic_gradient_descent(training_data, 1000, epochs=10, mini_batch_size=10, learning_rate=3.0)
def init(digit1, digit2, num_train, num_test): # Download dataset if not os.path.isfile("mnist.pkl"): mnist.init() # Load whole dataset into memory x_train, t_train, x_test, t_test = mnist.load() # Subset training data if num_train > 0: indices1 = [ i for i, j in enumerate(t_train) if ((j == digit1) or (j == digit2)) ] x = x_train[indices1, :] y = t_train[indices1] y = np.cast[int](y) y[y == digit1] = -1 y[y == digit2] = 1 ind1 = np.random.choice(np.arange(y.size), num_train) x = x[ind1, :] y = y[ind1] else: x = None y = None # Subset test data if num_test > 0: indices2 = [ i for i, j in enumerate(t_test) if ((j == digit1) or (j == digit2)) ] xtest = x_test[indices2, :] ytest = t_test[indices2] ytest = np.cast[int](ytest) ytest[ytest == digit1] = -1 ytest[ytest == digit2] = 1 ind2 = np.random.choice(np.arange(ytest.size), num_test) xtest = xtest[ind2, :] ytest = ytest[ind2] else: xtest = None ytest = None # Return return (x, y, xtest, ytest)
def format(): traindata, trainlabels, testdata, testlabels = mnist.load() trainlabels = list(trainlabels) # label data and scale from (0,255) to (0,1) trainlabeled = [[], [], [], [], [], [], [], [], [], []] for i in range(len(traindata)): trainlabeled[trainlabels[i]].append(traindata[i] / 255) testlabels = list(testlabels) # label data and scale from (0,255) to (0,1) testlabeled = [[], [], [], [], [], [], [], [], [], []] for i in range(len(testdata)): testlabeled[testlabels[i]].append(testdata[i] / 255) return trainlabeled, testlabeled
def main(): if len(sys.argv) == 1: print("Ejecuta: python3 clustering.py [n] [p].") return n_proj = int(sys.argv[1]) p = float(sys.argv[2]) x_train, t_train, _, _ = mnist.load() n, m = 360, 784 data = np.zeros((n, m), dtype='float') labels = np.zeros((n, 1), dtype='int') cnt, idx = 0, 0 while cnt < 300: if t_train[idx] == 3 or t_train[idx] == 8 or t_train[idx] == 9: data[cnt, :] = (x_train[idx, :] / LA.norm(x_train[idx, :], 1)) labels[cnt] = t_train[idx] cnt += 1 idx += 1 while cnt < 360: if t_train[idx] != 3 and t_train[idx] != 8 and t_train[idx] != 9: data[cnt, :] = (x_train[idx, :] / LA.norm(x_train[idx, :], 1)) labels[cnt] = t_train[idx] cnt += 1 idx += 1 W_PCA = optimization.PCA(data, n_proj) data_proj_PCA = data.dot(W_PCA) data_mean = np.zeros((data.shape[1]), dtype='float') for i in range(data.shape[1]): data_mean[i] = np.mean(data[:, i]) gm = optimization.gen_mean(data, data_mean, p) _, W_PCAGM = optimization.PCAGM(data, gm, n_proj, W_PCA, p) data_proj_PCAGM = data.dot(W_PCAGM) k = 3 prec_1 = make_clustering(data_proj_PCA, labels, 3) prec_2 = make_clustering(data_proj_PCAGM, labels, 3) print("Precision de la clasificacion (PCA): ", prec_1) print("Precision de la clasificacion (PCA GM): ", prec_2)
def load_full_mnist(): """ Loads and splits the dataset into train, validation and test. """ train_size = 20000 test_size = 10000 X_train, Y_train, X_val, Y_val = mnist.load() # First 20000 images from train set X_train, Y_train = X_train[:train_size], Y_train[:train_size] # Last 2000 images from test set X_val, Y_val = X_val[-test_size:], Y_val[-test_size:] # Reshape to (N, 1) Y_train = Y_train.reshape(-1, 1) Y_val = Y_val.reshape(-1, 1) print(f"Train shape: X: {X_train.shape}, Y: {Y_train.shape}") print(f"Validation shape: X: {X_val.shape}, Y: {Y_val.shape}") return X_train, Y_train, X_val, Y_val
def load_split_tasks(n): tc = 3000 vc = 1000 nclass = 10 ds, vds = mnist.load(shuffle=False, train_count=tc, val_count=vc) sds = [] for c in range(nclass): sds.append((ds[tc * c:tc * (c + 1)], vds[vc * c:vc * (c + 1)])) np.random.shuffle(sds) tasks = [] for g in range(0, nclass, n): cds = sds[g:g + n] if len(cds) == n: ct, cv = zip(*cds) ct = np.array([i for j in ct for i in j]) cv = np.array([i for j in cv for i in j]) print ct.shape, cv.shape tasks.append((ct, cv)) x, y = map(np.array, zip(*ct)) return tasks
def total_mmv(): traindata, _, _, _ = mnist.load() scale = [] for t in traindata: scale.append(t / 255) mmv_total = { 'mean': np.array([]), 'median': np.array([]), 'var': np.array([]), 'std': np.array([]) } mmv_total['mean'] = np.mean(scale, axis=0) mmv_total['median'] = np.median(scale, axis=0) mmv_total['var'] = np.var(scale, axis=0) mmv_total['std'] = np.std(scale, axis=0) return mmv_total
def create_train_test_split(digits, n_testimages, transpose_trainig=True, transpose_test=False): x_train, y_train, x_test, y_test = mnist.load() d1_train = pick_digit_from_data(x_train, y_train, digits[0]) d1_test = pick_digit_from_data(x_test, y_test, digits[0])[0:n_testimages] d2_train = pick_digit_from_data(x_train, y_train, digits[1]) d2_test = pick_digit_from_data(x_test, y_test, digits[1])[0:n_testimages] if transpose_trainig: d1_train = d1_train.T d2_train = d2_train.T if transpose_test: d1_test = d1_test.T d2_test = d2_test.T return d1_train, d1_test, d2_train, d2_test
def load_full_mnist(val_percentage: float): """ Loads and splits the dataset into train, validation and test. """ train_size = 20000 test_size = 2000 X_train, Y_train, X_test, Y_test = mnist.load() # First 20000 images from train set X_train, Y_train = X_train[:train_size], Y_train[:train_size] # Last 2000 images from test set X_test, Y_test = X_test[-test_size:], Y_test[-test_size:] # Reshape to (N, 1) Y_train = Y_train.reshape(-1, 1) Y_test = Y_test.reshape(-1, 1) X_train, Y_train, X_val, Y_val = train_val_split(X_train, Y_train, val_percentage) print(f"Train shape: X: {X_train.shape}, Y: {Y_train.shape}") print(f"Validation shape: X: {X_val.shape}, Y: {Y_val.shape}") print(f"Test shape: X: {X_test.shape}, Y: {Y_test.shape}") return X_train, Y_train, X_val, Y_val, X_test, Y_test
def load_binary_dataset(class1: int, class2: int, val_percentage: float): """ Loads, prunes and splits the dataset into train, validation and test. """ train_size = 20000 test_size = 2000 X_train, Y_train, X_test, Y_test = mnist.load() # First 20000 images from train set X_train, Y_train = X_train[:train_size], Y_train[:train_size] # Last 2000 images from test set X_test, Y_test = X_test[-test_size:], Y_test[-test_size:] X_train, Y_train = binary_prune_dataset(class1, class2, X_train, Y_train) X_test, Y_test = binary_prune_dataset(class1, class2, X_test, Y_test) # Reshape to (N, 1) Y_train = Y_train.reshape(-1, 1) Y_test = Y_test.reshape(-1, 1) X_train, Y_train, X_val, Y_val = train_val_split(X_train, Y_train, val_percentage) print(f"Train shape: X: {X_train.shape}, Y: {Y_train.shape}") print(f"Validation shape: X: {X_val.shape}, Y: {Y_val.shape}") print(f"Test shape: X: {X_test.shape}, Y: {Y_test.shape}") return X_train, Y_train, X_val, Y_val, X_test, Y_test
def mnist_preprocess(data): data['data'] /= 255. return data # Logger setup logger = Logger('MNIST AE', train_log_mode='TRAIN_LOSS_ONLY', test_log_mode='TEST_LOSS_ONLY') # Configure GPU Device if args.gpu >= 0: cuda.check_cuda_available() xp = cuda.cupy if args.gpu >= 0 else np # loading dataset dataset = mnist.load() dim = dataset['train']['data'][0].size N_train = len(dataset['train']['target']) N_test = len(dataset['test']['target']) train_data_dict = {'data':dataset['train']['data'].reshape(N_train, dim).astype(np.float32)} test_data_dict = {'data':dataset['test']['data'].reshape(N_test, dim).astype(np.float32)} train_data = DataFeeder(train_data_dict, batchsize=args.batch) test_data = DataFeeder(test_data_dict, batchsize=args.valbatch) train_data.hook_preprocess(mnist_preprocess) test_data.hook_preprocess(mnist_preprocess) # Model Setup h_units = 1200 model = models.AutoencoderModel(
import mnist import numpy as np import one_hot_encoding as ohe import matplotlib.pyplot as plt import pickle #Data settings training_data_size = 55000 validation_data_size = 4000 testing_data_size = 1000 #Loading data from MNIST dataset X_train, Y_train, X_test, Y_test = mnist.load() #Reducing values to between 0 - 1 X_train = X_train/255 X_test = X_test/255 #Performing the "bias trick" X_train = np.concatenate((X_train,np.ones([60000,1])), axis=1) X_test = np.concatenate((X_test,np.ones([10000,1])), axis=1) #Selecting training data and validation data training_data_input = X_train[0:training_data_size,:].copy() training_data_output = Y_train[0:training_data_size].copy() validation_data_input = X_train[training_data_size:training_data_size+validation_data_size].copy() validation_data_output = Y_train[training_data_size:training_data_size+validation_data_size].copy() testing_data_input = X_test[-testing_data_size:].copy() testing_data_output = Y_test[-testing_data_size:].copy() #One hot encode the wanted results
# goal: use weights/biases learned in rbm_matlab_minst_general.py to get two things: # 1. the low dimensional representation of each example # 2. the reconstruction of each example # this is basically going to be a translation of the non-backprop part in backprop.m import numpy as np import mnist from rbm_matlab_mnist_general import random_mini_batches from pylab import imshow, cm, show import os ## load mnist data, make batches # read data x_train, t_train, x_test, t_test = mnist.load() # scale data x_train = x_train / 255 x_test = x_test / 255 # batch input data batchdata = random_mini_batches( x_train, mini_batch_size=100) # list of batches of input data numbatches = len(batchdata) ## load weights home = os.getenv('HOME') vishid = np.load(home + '/Deep_Learning_Examples/RBM/vishid.npy') hidrecbiases = np.load(home + '/Deep_Learning_Examples/RBM/hidrecbiases.npy') visbiases = np.load(home + '/Deep_Learning_Examples/RBM/visbiases.npy') hidpen = np.load(home + '/Deep_Learning_Examples/RBM/hidpen.npy') penrecbiases = np.load(home + '/Deep_Learning_Examples/RBM/penrecbiases.npy')
# ax.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest') # ax.set_title('Training: %i' % label) # flatten the images n_samples = len(digits.images) data = digits.images.reshape((n_samples, -1)) X_train2, X_test2, y_train2, y_test2 = train_test_split(data, digits.target, test_size=0.5, shuffle=False) # print(X_train2[1]) # print(y_train2[1]) # print(X_test2[1]) # print(y_test2[1]) X_train, y_train, X_test, y_test = mnist.load() X_train = (X_train[1:limit, :] / 16.0).astype(numpy.uint8).astype( numpy.float64) X_test = (X_test[1:limit, :] / 16.0).astype(numpy.uint8).astype(numpy.float64) y_train = y_train[1:limit] y_test = y_test[1:limit] # print(X_train[2]) # print(y_train[2]) # print(X_test[1]) # print(y_test) # Create a classifier: a support vector classifier clf = svm.SVC(gamma='scale') # Learn the digits on the train subset
def __init__(self): self.X_train, self.Y_train, self.X_test, self.Y_test = mnist.load() self.X_val = None self.Y_val = None self.X_train_vanilla = self.X_train self.Y_train_vanilla = self.Y_train
def vectorize(i: int) -> List: vec = np.zeros((10, 1)) vec[i] = 1 return vec epochs = 1 mini_batch_size = 20 eta = 0.4 NN_struct = [] input_layer = 784 output_layer = 10 hidden_layers = [100] NN_struct.append(input_layer) for layer in hidden_layers: NN_struct.append(layer) NN_struct.append(output_layer) train_images, train_labels, test_images, test_labels = load() train_labels = np.array([vectorize(i) for i in train_labels]) test_labels = np.array([vectorize(i) for i in test_labels]) train_data = np.array(list(zip(train_images, train_labels))) test_data = np.array(list(zip(test_images, test_labels))) network = Network([784, 30, 10]) network.train_SGD(train_data, epochs, mini_batch_size, eta, test_data=test_data) print(f"Saving object via pickle with parameters:\nepochs - {epochs}\n" f"minibatch size - {mini_batch_size}\neta - {eta}") network.save(f"SNN_{epochs}_{mini_batch_size}_{eta}_HL_{hidden_layers}.pkl") print("Done.")
import torch.nn.functional as F import torch.nn as nn import torch from torch import optim import mnist import numpy as np trainingData, trainingLabels, testData, testLabels = mnist.load() trainingData = trainingData / 255 > 0.5 testData = testData / 255 > 0.5 trainingData = trainingData.astype(float) testData = testData.astype(float) trainingSize = 60000 actualTrainingSize = 50000 validSize = 10000 testSize = 10000 numEpochs = 10 class regressionClassifier(nn.Module): def __init__(self): super(regressionClassifier, self).__init__() self.output = nn.Linear(784, 10) def forward(self, x): x = self.output(x) print(str(x.size())) #x = F.softmax(x,dim = 1 ) return x #nn.LogSoftmax(dim1)
# architecture two shapes = [(28, 28), 1000, 500, 200] rf_shapes = [(9, 9), None, None] rates = [1., 1., 1.] n_layers = len(shapes) - 1 assert len(rf_shapes) == n_layers assert len(rates) == n_layers # --- define our rate neuron model neuron = ('softlif', dict( sigma=0.01, tau_rc=0.02, tau_ref=0.002, gain=1, bias=1, amp=1. / 63.04)) neuron_fn = neurons.get_theano_fn(*neuron) # --- load the data train, valid, test = mnist.load( normalize=True, shuffle=True, spaun=args.spaun) train_images, test_images = train[0], test[0] # --- pretrain with SGD backprop n_epochs = 15 batch_size = 100 deep = DeepAutoencoder() data = train_images for i in range(n_layers): vis_func = None if i == 0 else neuron_fn # create autoencoder for the next layer auto = Autoencoder( shapes[i], shapes[i+1], rf_shape=rf_shapes[i], vis_func=vis_func, hid_func=neuron_fn)
# coding: utf-8 import numpy as np import pickle import mnist import func import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt from two_layer_net import TwoLayerNet (x_train, t_train), (x_test, t_test) = mnist.load(normalize=True, one_hot_label=True) iters_num = 10000 train_size = x_train.shape[0] batch_size = 100 learning_rate = 0.1 train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) net = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask]
args = parser.parse_args() if not os.path.exists(args.loadfile): raise IOError("Cannot find '%s'" % args.loadfile) data = np.load(args.loadfile) if all(a in data for a in ['weights', 'biases', 'Wc', 'bc']): # Static network params file if 'neuron' in data: _, neuron_params = data['neuron'] else: neuron_params = dict(sigma=0.01, tau_rc=0.02, tau_ref=0.002, gain=1, bias=1, amp=1. / 63.04) # --- load the testing data _, _, [images, labels] = mnist.load( normalize=True, shuffle=True, spaun=args.spaun) assert np.unique(labels).size == data['bc'].size # --- compute the error neuron = ('softlif', dict(neuron_params)) errors = compute_static_error(data, images, labels, neuron) print("----- Static network with softlif -----") print("Static error: %0.2f%%" % (100 * errors.mean())) neuron = ('lif', dict(neuron_params)) neuron[1].pop('sigma') errors = compute_static_error(data, images, labels, neuron) print("----- Static network with lif -----") print("Static error: %0.2f%%" % (100 * errors.mean())) view_static(data, images, labels, neuron)
random_state=None, verbose=True, stopping_criterion=None, # 'edv', 'tie' edv_threshold=0.25, tie_threshold=0.25, #======================================================================= # sparse=True, #======================================================================= sparse=False, minimum_sparseness=0.25, maximum_sparseness=0.75, early_stopping=False, validation_fraction=0.1, tol=0.0001, n_iter_no_change=10, metric='rmse', prob_skip_connection=0.0) # 0.35 return estimator if __name__ == '__main__': scale = True X_train, y_train, X_test, y_test = load(scale) for run in range(1, 1 + 1): print('MNIST: SLM run', run) estimator = get_estimator() fit_and_predict(estimator, X_train, y_train, X_test, y_test)
parser.add_argument('--max_epochs', type=int, default=1000) parser.add_argument('--batch_size', type=int, default=32) parser.add_argument('output_dir', type=str) parser.add_argument('network_structure', type=str) args = parser.parse_args() os.makedirs(args.output_dir) # Set up the network: print "Setting up network..." dimensions = [28*28] + [int(x) for x in args.network_structure.split("x")] + [10] net = models.BatchTrainedModel(models.perceptron_model(dimensions)) print "Loading MNIST..." (train_x, train_y), (test_x, test_y) = mnist.load() print "Done Loading MNIST." print "%d training examples" % train_x.shape[0] print "Training..." graph_f = open("%s/graph.tsv" % args.output_dir, "w") for i in xrange(args.max_epochs+1): print "Ran for", i, "epochs" net.write("%s/epoch_%04d.hdf5" % (args.output_dir, i)) train_error = net.error_rate(train_x, train_y) test_error = net.error_rate(test_x, test_y) graph_f.write("%d\t%f\t%f\n" % (i, train_error, test_error)) graph_f.flush() print "Train Error rate =", train_error print "Test Error rate =", test_error
import mnist import numpy as np import NeuralNetwork as nn import random import pygame import math # Get data from MNIST # NOTE: I did not write the code to retrieve the data, code taken from: # https://github.com/hsjeong5/MNIST-for-Numpy imgTrain, lblTrain, imgTest, lblTest = mnist.load() # Initialize input, hidden and output layers il = np.zeros((784, 1)) hl1 = np.zeros((50, 1)) hl2 = np.zeros((50, 1)) middleLayer = np.zeros((2, 1)) hl3 = np.zeros((50, 1)) hl4 = np.zeros((50, 1)) ol = np.zeros((784, 1)) layers = [il, hl1, middleLayer, hl4, ol] # Initializes the network, loading the weights and biases from the files # network = nn.NeuralNetwork(layers, learningRate=0.0003, weightImportFile="EncoderWeights.txt", biasImportFile="EncoderBiases.txt")
def load_data(): global training_data, test_data training_data, test_data = mnist.load()
def train(input_dim=INPUT_DIM, batch_size=BATCH_SIZE, n_features_first=N_FEATURES_FIRST, critic_iters=CRITIC_ITERS, lambda_reg=LAMBDA, learning_rate=1e-4, iterations=ITERS, fixed_noise_size=FIXED_NOISE_SIZE, n_features_reduction_factor=2, gen_fix_layer_1=False, gen_fix_layer_2=False, gen_fix_layer_3=False, gen_fix_layer_4=False, disc_fix_layer_1=False, disc_fix_layer_2=False, disc_fix_layer_3=False, disc_fix_layer_4=False, architecture='DCGAN', init_method='He', BN_layers_trainable=True, load_saved=True): """ - this is the function to use to train a GAN model for MNIST, with the configuration given by the parameters - the function computes losses and auto-saves the model every 100 steps and automatically resumes training where it stopped (when load_saved=True) :param input_dim: :param batch_size: :param n_features_first: :param critic_iters: :param lambda_reg: :param learning_rate: :param iterations: :param fixed_noise_size: :param n_features_reduction_factor: integer, e.g.: 1: use same number of feature-maps everywhere, 2: half the number of feature-maps in every step :param architecture: right now only supports 'WGANGP' and 'DCGAN', defaults to 'DCGAN' :param init_method: the method with which the variables are initialized, support: 'uniform', 'normal', 'truncated_normal' (each using std given by xavier initializer), 'normal1', 'truncated_normal1' (each using std 1), 'normal_BN', 'uniform_BN', 'normal_BN_shift', 'He', defaults to 'He' :param BN_layers_trainable: shall the BN layers be trainable :param load_saved: :return: """ # ------------------------------------------------------- # setting for sending emails and getting statistics send = settings.send_email get_stats = settings.get_statistics # ------------------------------------------------------- # architecture default if architecture not in ['WGANGP']: architecture = 'DCGAN' if architecture == 'DCGAN': lambda_reg = None # ------------------------------------------------------- # init_method default if init_method not in [ 'normal', 'truncated_normal', 'normal1', 'truncated_normal1', 'normal_BN', 'uniform_BN', 'normal_BN_shift', 'He', 'LayerDistribution' ]: init_method = 'uniform' # ------------------------------------------------------- # create unique folder name dir1 = 'partly_fixed2/' directory = dir1+str(input_dim)+'_'+str(batch_size)+'_'+str(n_features_first)+'_'+str(critic_iters)+'_'+\ str(lambda_reg)+'_'+str(learning_rate)+'_'+str(n_features_reduction_factor)+'_'+\ str(gen_fix_layer_1)+'_'+str(gen_fix_layer_2)+'_'+str(gen_fix_layer_3)+'_'+str(gen_fix_layer_4)+'_' + \ str(disc_fix_layer_1) + '_' + str(disc_fix_layer_2) + '_' + str(disc_fix_layer_3) + '_' + \ str(disc_fix_layer_4) + '_' + \ str(architecture)+'_'+str(init_method)+'_'+str(BN_layers_trainable)+'_'+str(BN)+'/' samples_dir = directory + 'samples/' model_dir = directory + 'model/' # create directories if they don't exist if not os.path.isdir(dir1): call(['mkdir', dir1]) if not os.path.isdir(directory): load_saved = False print 'make new directory:', directory print call(['mkdir', directory]) call(['mkdir', samples_dir]) call(['mkdir', model_dir]) # if directories already exist, but model wasn't saved so far, set load_saved to False if 'training_progress.csv' not in os.listdir(directory): load_saved = False # ------------------------------------------------------- # initialize a TF session config = tf.ConfigProto() if N_CPUS_TF is None: number_cpus_tf = settings.number_cpus else: number_cpus_tf = N_CPUS_TF config.intra_op_parallelism_threads = number_cpus_tf config.inter_op_parallelism_threads = number_cpus_tf session = tf.Session(config=config) # ------------------------------------------------------- # convenience function to build the model def build_model(gen_fix_layer_1_b=gen_fix_layer_1, gen_fix_layer_2_b=gen_fix_layer_2, gen_fix_layer_3_b=gen_fix_layer_3, gen_fix_layer_4_b=gen_fix_layer_4, disc_fix_layer_1_b=disc_fix_layer_1, disc_fix_layer_2_b=disc_fix_layer_2, disc_fix_layer_3_b=disc_fix_layer_3, disc_fix_layer_4_b=disc_fix_layer_4): with tf.name_scope('placeholders'): x_true = tf.placeholder(tf.float32, [None, 28, 28, 1]) z = tf.placeholder(tf.float32, [None, input_dim]) x_generated = generator( z, n_features_first=n_features_first, n_features_reduction_factor=n_features_reduction_factor, fix_layer_1=gen_fix_layer_1_b, fix_layer_2=gen_fix_layer_2_b, fix_layer_3=gen_fix_layer_3_b, fix_layer_4=gen_fix_layer_4_b, architecture=architecture, init_method=init_method) d_true = discriminator( x_true, reuse=False, n_features_first=n_features_first, n_features_reduction_factor=n_features_reduction_factor, fix_layer_1=disc_fix_layer_1_b, fix_layer_2=disc_fix_layer_2_b, fix_layer_3=disc_fix_layer_3_b, fix_layer_4=disc_fix_layer_4_b, architecture=architecture, init_method=init_method) d_generated = discriminator( x_generated, reuse=True, n_features_first=n_features_first, n_features_reduction_factor=n_features_reduction_factor, fix_layer_1=disc_fix_layer_1_b, fix_layer_2=disc_fix_layer_2_b, fix_layer_3=disc_fix_layer_3_b, fix_layer_4=disc_fix_layer_4_b, architecture=architecture, init_method=init_method) if architecture == 'DCGAN': with tf.name_scope('loss'): g_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=d_generated, labels=tf.ones_like(d_generated))) d_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=d_generated, labels=tf.zeros_like(d_generated))) +\ tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=d_true, labels=tf.ones_like(d_true))) d_loss = d_loss / 2. with tf.name_scope('g_optimizer'): g_optimizer = tf.train.AdamOptimizer(learning_rate=2 * learning_rate, beta1=0.5) g_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='generator') # make BN layers trainable or not, depending on BN_layers_trainable g_vars2 = [] not_to_include = [] if gen_fix_layer_1_b: not_to_include += [ 'generator/fully_connected/BatchNorm/beta:0' ] if gen_fix_layer_2_b: not_to_include += [ 'generator/Conv2d_transpose/BatchNorm/beta:0' ] if gen_fix_layer_3_b: not_to_include += [ 'generator/Conv2d_transpose_1/BatchNorm/beta:0' ] if disc_fix_layer_1_b: not_to_include += ['discriminator/Conv/BatchNorm/beta:0'] if disc_fix_layer_2_b: not_to_include += ['discriminator/Conv_1/BatchNorm/beta:0'] if disc_fix_layer_3_b: not_to_include += ['discriminator/Conv_2/BatchNorm/beta:0'] for v in g_vars: if v.name not in not_to_include: g_vars2 += [v] if not BN_layers_trainable: g_vars = g_vars2 g_train = g_optimizer.minimize(g_loss, var_list=g_vars) with tf.name_scope('d_optimizer'): d_optimizer = tf.train.AdamOptimizer(learning_rate=2 * learning_rate, beta1=0.5) d_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='discriminator') # make BN layers trainable or not, depending on BN_layers_trainable d_vars2 = [] for v in d_vars: if v.name not in not_to_include: d_vars2 += [v] if not BN_layers_trainable: d_vars = d_vars2 d_train = d_optimizer.minimize(d_loss, var_list=d_vars) else: # WGAN-GP with tf.name_scope('regularizer'): epsilon = tf.random_uniform([batch_size, 1, 1, 1], 0.0, 1.0) x_hat = epsilon * x_true + (1 - epsilon) * x_generated d_hat = discriminator( x_hat, reuse=True, n_features_first=n_features_first, n_features_reduction_factor=n_features_reduction_factor, fix_layer_1=disc_fix_layer_1_b, fix_layer_2=disc_fix_layer_2_b, fix_layer_3=disc_fix_layer_3_b, fix_layer_4=disc_fix_layer_4_b, architecture=architecture, init_method=init_method) gradients = tf.gradients(d_hat, x_hat)[0] ddx = tf.sqrt(tf.reduce_sum(gradients**2, axis=[1, 2])) d_regularizer = tf.reduce_mean((ddx - 1.0)**2) with tf.name_scope('loss'): g_loss = -tf.reduce_mean(d_generated) wasserstein_dist = tf.reduce_mean(d_true) - tf.reduce_mean( d_generated) d_loss = -wasserstein_dist + lambda_reg * d_regularizer with tf.name_scope('g_optimizer'): g_optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate, beta1=0, beta2=0.9) g_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='generator') g_train = g_optimizer.minimize(g_loss, var_list=g_vars) with tf.name_scope('d_optimizer'): d_optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate, beta1=0, beta2=0.9) d_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='discriminator') d_train = d_optimizer.minimize(d_loss, var_list=d_vars) # initialize variables using uniform xavier init method, see tensorflow documentation session.run(tf.global_variables_initializer()) if architecture == 'DCGAN': return x_true, z, x_generated, g_loss, d_loss, g_train, d_train, g_vars, d_vars else: # WGANGP return x_true, z, x_generated, g_loss, wasserstein_dist, d_loss, g_train, d_train, g_vars, d_vars # ------------------------------------------------------- # build the model if (init_method in ['uniform', 'He', 'normal']) or load_saved: if architecture == 'DCGAN': x_true, z, x_generated, g_loss, d_loss, g_train, d_train, g_vars, d_vars = build_model( ) else: # WGANGP x_true, z, x_generated, g_loss, wasserstein_dist, d_loss, g_train, d_train, g_vars, d_vars = build_model( ) else: # not load_saved and not 'uniform' # build model with all variables trainable to be able to change weights if architecture == 'DCGAN': x_true, z, x_generated, g_loss, d_loss, g_train, d_train, \ g_vars, d_vars = build_model(False, False, False, False,False, False, False, False) else: # WGANGP x_true, z, x_generated, g_loss, wasserstein_dist, d_loss, g_train, \ d_train, g_vars, d_vars = build_model(False, False, False, False, False, False, False, False) # change the weights as wanted saver = tf.train.Saver(max_to_keep=1) trainable_vars = tf.trainable_variables() if get_stats: import matplotlib.pyplot as plt for v in trainable_vars: print 'change weights of: ' + str(v.name) weights = session.run(v) # if 'BatchNorm' in v.name: #delete # print 'BN weights:' #delete # print weights #delete # print #delete if init_method == 'truncated_normal': # using xavier init method, see tensorflow documentation max_abs_val = np.max(np.abs(weights)) session.run( tf.assign(v, value=tf.truncated_normal(v.shape, mean=0.0, stddev=max_abs_val / np.sqrt(3)))) elif init_method == 'normal1': session.run( tf.assign(v, value=tf.random_normal(v.shape, mean=0.0, stddev=1.0))) elif init_method == 'truncated_normal1': session.run( tf.assign(v, value=tf.truncated_normal(v.shape, mean=0.0, stddev=1.0))) elif init_method == 'uniform_BN': max_abs_val = np.max(np.abs(weights)) if 'BatchNorm' in v.name: session.run( tf.assign(v, value=tf.random_uniform(v.shape, minval=-last_val, maxval=last_val))) last_val = max_abs_val elif init_method == 'normal_BN': max_abs_val = np.max(np.abs(weights)) if 'BatchNorm' in v.name: session.run( tf.assign(v, value=tf.random_normal(v.shape, mean=0.0, stddev=last_val / np.sqrt(3)))) else: session.run( tf.assign(v, value=tf.random_normal(v.shape, mean=0.0, stddev=max_abs_val / np.sqrt(3)))) last_val = max_abs_val elif init_method == 'normal_BN_shift': max_abs_val = np.max(np.abs(weights)) if 'BatchNorm' in v.name: session.run( tf.assign(v, value=tf.random_normal(v.shape, mean=-last_val, stddev=last_val))) else: session.run( tf.assign(v, value=tf.random_normal(v.shape, mean=0.0, stddev=max_abs_val / np.sqrt(3)))) last_val = max_abs_val elif init_method == 'LayerDistribution': if v.name == 'generator/fully_connected/weights:0': session.run( tf.assign(v, value=tf.random_normal(v.shape, mean=0.0, stddev=0.037907723))) elif v.name == 'generator/Conv2d_transpose/weights:0': session.run( tf.assign(v, value=tf.random_normal(v.shape, mean=-0.007851141, stddev=0.034838371))) elif v.name == 'generator/Conv2d_transpose_1/weights:0': session.run( tf.assign(v, value=tf.random_normal(v.shape, mean=-0.001966879, stddev=0.037020162))) elif v.name == 'generator/Conv2d_transpose_2/weights:0': session.run( tf.assign(v, value=tf.random_normal(v.shape, mean=-0.121885814, stddev=0.294095486))) elif v.name == 'discriminator/Conv/weights:0': session.run( tf.assign(v, value=tf.random_normal(v.shape, mean=-0.005809855, stddev=0.044240803))) elif v.name == 'discriminator/Conv_1/weights:0': session.run( tf.assign(v, value=tf.random_normal(v.shape, mean=-0.000329115, stddev=0.03293338))) elif v.name == 'discriminator/Conv_2/weights:0': session.run( tf.assign(v, value=tf.random_normal(v.shape, mean=-0.000697783, stddev=0.028810507))) elif v.name == 'discriminator/fully_connected/weights:0': session.run( tf.assign(v, value=tf.random_normal(v.shape, mean=0.000849896, stddev=0.074863143))) if get_stats: weights_new = session.run(v) f = plt.figure() plt.hist(np.reshape(weights_new, newshape=(-1, )), bins=100, density=True) f.savefig(fname=directory + v.name.replace('/', '_').replace(':', '') + '.png') plt.close(f) saver.save(sess=session, save_path=model_dir + 'saved_model') print print 'weights were initialized with: ' + init_method print # load new session, so that no conflict with names in the name_scopes session.close() tf.reset_default_graph() session = tf.Session(config=config) # load the model with the perturbed weights, but now s.t. the correct variables are trainable if architecture == 'DCGAN': x_true, z, x_generated, g_loss, d_loss, g_train, d_train, g_vars, d_vars = build_model( ) else: # WGANGP x_true, z, x_generated, g_loss, wasserstein_dist, d_loss, g_train, d_train, g_vars, d_vars = build_model( ) # restore the model with the correctly initialized weights saver = tf.train.Saver(max_to_keep=1) saver.restore(sess=session, save_path=model_dir + 'saved_model') print 'loaded model with weights initialized with: ' + init_method print # ------------------------------------------------------- # FK: For saving samples, taken from IWGAN fixed_noise = np.random.normal(size=(fixed_noise_size, input_dim)).astype('float32') def generate_image(frame): samples = session.run(x_generated, feed_dict={ z: fixed_noise }).squeeze() # print samples.shape save_images.save_images(samples.reshape((fixed_noise_size, 28, 28)), samples_dir + 'iteration_{}.png'.format(frame)) # ------------------------------------------------------- # FK: for saving the model create a saver saver = tf.train.Saver(max_to_keep=1) iterations_trained = 0 if architecture == 'DCGAN': training_progress = pd.DataFrame( data=None, index=None, columns=['iteration', 'time', 'd_loss']) else: # WGAN-GP training_progress = pd.DataFrame( data=None, index=None, columns=['iteration', 'time', 'Wasserstein_dist', 'd_loss']) # restore the model: if load_saved: saver.restore(sess=session, save_path=model_dir + 'saved_model') iterations_trained = int(np.loadtxt(fname=model_dir + 'iterations.csv')) tp_app = pd.read_csv(filepath_or_buffer=directory + 'training_progress.csv', index_col=0, header=0) training_progress = pd.concat([training_progress, tp_app], axis=0, ignore_index=True) print 'loaded training progress, and the model, which was already trained for {} iterations'.format( iterations_trained) print training_progress print # if the network is already trained completely, set send to false if iterations_trained == iterations: send = False # ------------------------------------------------------- # FK: print and get model summary n_params_gen = model_summary(var_list=g_vars)[0] print n_params_disc = model_summary(var_list=d_vars)[0] print # ------------------------------------------------------- # FK: print model config to file model_config = [[ 'input_dim', 'batch_size', 'n_features_first', 'critic_iters', 'lambda_reg', 'learning_rate', 'fixed_noise_size', 'n_features_reduction_factor', 'gen_fix_layer_1', 'gen_fix_layer_2', 'gen_fix_layer_3', 'gen_fix_layer_4', 'disc_fix_layer_1', 'disc_fix_layer_2', 'disc_fix_layer_3', 'disc_fix_layer_4', 'architecture', 'init_method', 'BN_layers_trainable', 'n_trainable_params_gen', 'n_trainable_params_disc' ], [ input_dim, batch_size, n_features_first, critic_iters, lambda_reg, learning_rate, fixed_noise_size, n_features_reduction_factor, gen_fix_layer_1, gen_fix_layer_2, gen_fix_layer_3, gen_fix_layer_4, disc_fix_layer_1, disc_fix_layer_2, disc_fix_layer_3, disc_fix_layer_4, architecture, init_method, BN_layers_trainable, n_params_gen, n_params_disc ]] model_config = np.transpose(model_config) model_config = pd.DataFrame(data=model_config) model_config.to_csv(path_or_buf=directory + 'model_config.csv') print 'saved model configuration' print # ------------------------------------------------------- # FK: get the MNIST data loader train_gen, dev_gen, test_gen = mnist.load(batch_size, batch_size) # create an infinite generator def inf_train_gen(): while True: for images, targets in train_gen(): yield images gen = inf_train_gen() # ------------------------------------------------------- # training loop print model_config print t = time.time() # get start time # for average times: if get_stats: t1s = np.zeros((iterations - iterations_trained)) t2s = np.zeros((iterations - iterations_trained)) t3s = np.zeros((iterations - iterations_trained)) t4s = np.zeros((iterations - iterations_trained)) for i in xrange(iterations - iterations_trained): z_train = np.random.randn(batch_size, input_dim) if get_stats: tt1 = time.time() session.run(g_train, feed_dict={z: z_train}) if get_stats: tt1 = time.time() - tt1 # loop for critic training for j in xrange(critic_iters): # FK: insert the following 3 lines s.t. not the same batch is used for all 5 discriminator updates if get_stats: tt = time.time() batch = gen.next() images = batch.reshape([-1, 28, 28, 1]) z_train = np.random.randn(batch_size, input_dim) if get_stats: print '\ncomputation time to get true batch and random vector: {}'.format( time.time() - tt) tt = time.time() session.run(d_train, feed_dict={x_true: images, z: z_train}) if get_stats: t1 = time.time() - tt + tt1 t1s[i] = t1 print 'computation time to train for 1 iteration (minimize disc and gen one step): t1 = {}'.format( t1) tt = time.time() session.run(d_loss, feed_dict={x_true: images, z: z_train}) session.run(g_loss, feed_dict={z: z_train}) t2 = time.time() - tt t2s[i] = t2 print 'computation time to compute the disc. and gen. loss once: t2 = {}'.format( t2) tt = time.time() session.run(x_generated, feed_dict={z: z_train}) t3 = time.time() - tt t3s[i] = t3 print 'computation time to compute x_generated: t3 = {}'.format( t3) print 't1/t2 = {}'.format(t1 / t2) # list_ = session.run(g_optimizer.compute_gradients(g_loss, var_list=g_vars), feed_dict={z: z_train}) # print 'number of gradients computed: {}'.format(2*len(list_)) # print the current iteration print('iteration={}/{}'.format(i + iterations_trained + 1, iterations)) # all 100 steps compute the losses and elapsed times, and generate images if (i + iterations_trained) % 100 == 99: # get time for last 100 iterations elapsed_time = time.time() - t # generate sample images from fixed noise generate_image(i + iterations_trained + 1) print 'generated images' # compute and save losses on dev set if architecture == 'DCGAN': dev_d_loss = [] for images_dev, _ in dev_gen(): images_dev = images_dev.reshape([-1, 28, 28, 1]) z_train_dev = np.random.randn(batch_size, input_dim) _dev_d_loss = session.run(d_loss, feed_dict={ x_true: images_dev, z: z_train_dev }) dev_d_loss.append(_dev_d_loss) tp_app = pd.DataFrame(data=[[ i + iterations_trained + 1, elapsed_time, np.mean(dev_d_loss) ]], index=None, columns=['iteration', 'time', 'd_loss']) training_progress = pd.concat([training_progress, tp_app], axis=0, ignore_index=True) training_progress.to_csv(path_or_buf=directory + 'training_progress.csv') else: # WGAN-GP dev_W_dist = [] dev_d_loss = [] for images_dev, _ in dev_gen(): images_dev = images_dev.reshape([-1, 28, 28, 1]) z_train_dev = np.random.randn(batch_size, input_dim) _dev_W_dist = session.run(wasserstein_dist, feed_dict={ x_true: images_dev, z: z_train_dev }) _dev_d_loss = session.run(d_loss, feed_dict={ x_true: images_dev, z: z_train_dev }) dev_W_dist.append(_dev_W_dist) dev_d_loss.append(_dev_d_loss) tp_app = pd.DataFrame(data=[[ i + iterations_trained + 1, elapsed_time, np.mean(dev_W_dist), np.mean(dev_d_loss) ]], index=None, columns=[ 'iteration', 'time', 'Wasserstein_dist', 'd_loss' ]) training_progress = pd.concat([training_progress, tp_app], axis=0, ignore_index=True) training_progress.to_csv(path_or_buf=directory + 'training_progress.csv') print 'saved training progress' print # save model saver.save(sess=session, save_path=model_dir + 'saved_model') # save number of iterations trained np.savetxt(fname=model_dir + 'iterations.csv', X=[i + iterations_trained + 1]) print 'saved model after training iteration {}'.format( i + iterations_trained + 1) # fix new start time t = time.time() # average times: if get_stats: print '\n\naverage times over {} iterations:'.format( iterations - iterations_trained) print 'computation time to train for 1 iteration (minimize disc and gen one step): t1 = {}'.format( np.mean(t1s)) print 'computation time to compute the disc. and gen. loss once: t2 = {}'.format( np.mean(t2s)) print 'computation time to compute x_generated: t3 = {}'.format( np.mean(t3s)) if architecture == 'WGANGP': print 'computation time to compute gradient regularization term: t4 = {}'.format( np.mean(t4s)) print 't1/t2 = {}'.format(np.mean(t1s) / np.mean(t2s)) print # ------------------------------------------------------- # after training close the session session.close() tf.reset_default_graph() # ------------------------------------------------------- # when training is done send email if send: subject = 'GAN (MNIST) partly fixed training finished' body = 'to download the results of this model use (in the terminal):\n\n' body += 'scp -r [email protected]:/cluster/home/fkrach/MasterThesis/MTCode1/' + directory + ' .' files = [ directory + 'model_config.csv', directory + 'training_progress.csv', samples_dir + 'iteration_{}.png'.format(iterations) ] send_email.send_email(subject=subject, body=body, file_names=files) return directory