def load_mnist_data(dataset): if dataset.endswith('.npz'): print 'load npz' datasets = load_mnist(dataset) elif datasets.endswith('.gz'): print 'load gz' datasets = load_data(dataset) else: print 'can not load dataset:', dataset assert False return datasets
def test(): ux, uy = parse_data() usps_data = load_usps(ux, uy, validation_size=5000, test_size=0) mnist_data = load_mnist(one_hot=True, validation_size=5000) kernel_param = {"alpha": 1.0, "d": 2, "c": 0.0} sigma_list = [50] aa = autoAdapter(input_dim=28*28, new_dim=100, n_classes=10, batch_size_src=128, batch_size_tar=128, training_steps=1000, lamb=0.01, kernel_type='linear', kernel_param=kernel_param, sigma_list=sigma_list) aa.fit(mnist_data, usps_data, onehot=True, plot=True)
def main(): train, test = load_mnist() layers_dims = [784, 256, 64, 10] parameters = initialize_parameters_deep(layers_dims) for i in range(3): t = list(train) random.shuffle(t) X, y = zip(*t) parameters = L_layer_model(X, y, parameters)
def test_dropout_ala_original(): """Run standard dropout training on MNIST with parameters to reproduce the results from original papers by Hinton et. al.""" # Set suitable optimization parameters sgd_params = {} sgd_params['start_rate'] = 0.1 sgd_params['decay_rate'] = 0.998 sgd_params['wt_norm_bound'] = 3.5 sgd_params['epochs'] = 1000 sgd_params['batch_size'] = 100 sgd_params['result_tag'] = 'maxout' # Set some reasonable mlp parameters mlp_params = {} # Set up some proto-networks to spawn from pc0 = [28 * 28, (400, 4), (400, 4), 11] mlp_params['proto_configs'] = [pc0] # Set up some spawn networks sc0 = { 'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.05, 'do_dropout': True } #sc1 = {'proto_key': 0, 'input_noise': 0.05, 'bias_noise': 0.1, 'do_dropout': True} mlp_params['spawn_configs'] = [sc0] #, sc1] mlp_params['spawn_weights'] = [1.0] #, 0.0] # Set remaining params mlp_params['ear_type'] = 2 mlp_params['ear_lam'] = 0.0 mlp_params['lam_l2a'] = 1e-3 mlp_params['reg_all_obs'] = True # Initialize a random number generator for this test rng = np.random.RandomState(12345) # Load MNIST with train/validate sets merged into one dataset = 'data/mnist_batches.npz' datasets = load_mnist(dataset, zero_mean=False) # Construct the EAR_NET object that we will be training x_in = T.matrix('x_in') NET = EAR_NET(rng=rng, input=x_in, params=mlp_params) init_biases(NET, b_init=0.0) # Run training on the given MLP train_mlp(NET, sgd_params, datasets) return
def train_mlp(NET, mlp_params, sgd_params): """Run mlp training test.""" # Load some data to train/validate/test with #dataset = 'data/mnist.pkl.gz' #datasets = load_udm(dataset) dataset = 'data/mnist_batches.npz' datasets = load_mnist(dataset) # Tell the net that it's not semisupervised, which will force it to use # _all_ examples for computing the DEV regularizer. NET.is_semisupervised = 0 # Train the net NT.train_mlp(NET=NET, \ mlp_params=mlp_params, \ sgd_params=sgd_params, \ datasets=datasets) return
def train_mlp(NET, mlp_params, sgd_params): """Run mlp training test.""" # Load some data to train/validate/test with #dataset = 'data/mnist.pkl.gz' #datasets = load_udm(dataset) dataset = 'data/mnist_batches.npz' datasets = load_mnist(dataset) # Tell the net that it's not semisupervised, which will force it to use # _all_ examples for computing the DEV regularizer. NET.is_semisupervised = 0 # Train the net NT.train_mlp(NET=NET, \ mlp_params=mlp_params, \ sgd_params=sgd_params, \ datasets=datasets) return 1
def test_dropout_ala_original(): """Run standard dropout training on MNIST with parameters to reproduce the results from original papers by Hinton et. al.""" # Set suitable optimization parameters sgd_params = {} sgd_params['start_rate'] = 0.1 sgd_params['decay_rate'] = 0.998 sgd_params['wt_norm_bound'] = 3.5 sgd_params['epochs'] = 1000 sgd_params['batch_size'] = 100 sgd_params['result_tag'] = 'maxout' # Set some reasonable mlp parameters mlp_params = {} # Set up some proto-networks to spawn from pc0 = [28*28, (400, 4), (400, 4), 11] mlp_params['proto_configs'] = [pc0] # Set up some spawn networks sc0 = {'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.05, 'do_dropout': True} #sc1 = {'proto_key': 0, 'input_noise': 0.05, 'bias_noise': 0.1, 'do_dropout': True} mlp_params['spawn_configs'] = [sc0] #, sc1] mlp_params['spawn_weights'] = [1.0] #, 0.0] # Set remaining params mlp_params['ear_type'] = 2 mlp_params['ear_lam'] = 0.0 mlp_params['lam_l2a'] = 1e-3 mlp_params['reg_all_obs'] = True # Initialize a random number generator for this test rng = np.random.RandomState(12345) # Load MNIST with train/validate sets merged into one dataset = 'data/mnist_batches.npz' datasets = load_mnist(dataset, zero_mean=False) # Construct the EarNet object that we will be training x_in = T.matrix('x_in') NET = EarNet(rng=rng, input=x_in, params=mlp_params) init_biases(NET, b_init=0.0) # Run training on the given MLP train_mlp(NET, sgd_params, datasets) return
def main(): # numpy RandomState for reproducibility rng = np.random.RandomState(SEED) # Load the first NUM_POINTS 0's, 1's and 8's from MNIST X, y = load_mnist('datasets/', digits_to_keep=CLASSES_TO_USE, N=NUM_POINTS) # Obtain matrix of joint probabilities p_ij P = p_joint(X, PERPLEXITY) # Fit SNE or t-SNE Y = estimate_sne(X, y, P, rng, num_iters=NUM_ITERS, q_fn=q_tsne if TSNE else q_joint, grad_fn=tsne_grad if TSNE else symmetric_sne_grad, learning_rate=LEARNING_RATE, momentum=MOMENTUM, plot=NUM_PLOTS)
def main(): # Set global parameters NUM_POINTS = 200 CLASSES_TO_USE = [0, 1, 2, 3] # MNIST classes to use PERPLEXITY = 20 SEED = 1 MOMENTUM = 0.9 ETA = 10. NUM_ITERS = 2000 ANNEALING = 0. PCA_INIT = False # numpy RandomState for reproducability rng = np.random.RandomState(SEED) # Load the first 500 0's and 1's from the MNIST dataset X, y = load_mnist('/home/liam/datasets/', digits_to_keep=CLASSES_TO_USE, N=NUM_POINTS) # Get the negative euclidian distances matrix for our data distances = neg_squared_euc_dists(X) # Find optimal sigma for each row of this matrix, given our desired perplexity sigmas = find_optimal_sigmas(distances, target_perplexity=PERPLEXITY) # Calculate the probabilities based on these optimal sigmas p_conditional = calc_prob_matrix(distances, sigmas) #print(calc_perplexity(p_matrix)) P = p_joint(p_conditional) estimate_sne(P, num_iters=NUM_ITERS, q_fn=q_fn, grad_fn=grad_fn, pca_init=PCA_INIT, momentum=MOMENTUM, annealing=ANNEALING, plot=True)
# -*- coding: utf-8 -*- from __future__ import division import tensorflow as tf import numpy as np import os import shutil import time import load_data x_train, x_validation, x_test, y_train, y_validation, y_test = load_data.load_mnist('./data/mnist/', seed=0, as_image=True, scaling=True) BOARD_PATH = "./board/lab08-1_board" INPUT_DIM = np.size(x_train, 1) NCLASS = len(np.unique(y_train)) BATCH_SIZE = 32 TOTAL_EPOCH = 30 ALPHA = 0 ntrain = len(x_train) nvalidation = len(x_validation) ntest = len(x_test) image_width = np.size(x_train, 1) image_height = np.size(x_train, 2) print("\nThe number of train samples : ", ntrain) print("The number of validation samples : ", nvalidation)
sgd_params = {} sgd_params['start_rate'] = 0.25 sgd_params['decay_rate'] = 0.998 sgd_params['wt_norm_bound'] = 3.75 sgd_params['epochs'] = 1000 sgd_params['batch_size'] = 100 # Set parameters for the network to be trained mlp_params = {} mlp_params['layer_sizes'] = [28*28, 800, 800, 10] mlp_params['lam_l2a'] = 1e-3 mlp_params['dev_clones'] = 1 mlp_params['dev_types'] = [1, 1, 2] mlp_params['dev_lams'] = [0.1, 0.1, 2.0] mlp_params['use_bias'] = 1 # Pick a some data to train with datasets = load_mnist('data/mnist_batches.npz') #datasets = load_umontreal_data('data/mnist.pkl') # Set the type of network to train, based on user input if (len(sys.argv) != 3): print "Usage: {0} [raw|sde|dev] [result_tag]".format(sys.argv[0]) exit(1) elif sys.argv[1] == "raw": sgd_params['mlp_type'] = 'raw' sgd_params['result_tag'] = sys.argv[2] mlp_params['dev_lams'] = [0.0 for l in mlp_params['dev_lams']] elif sys.argv[1] == "sde": sgd_params['mlp_type'] = 'sde' sgd_params['result_tag'] = sys.argv[2] elif sys.argv[1] == "dev": sgd_params['mlp_type'] = 'dev'
from load_data import load_mnist load_mnist()
def test_mlp( initial_learning_rate, learning_rate_decay, squared_filter_length_limit, n_epochs, batch_size, mom_params, activations, dropout, dropout_rates, results_file_name, layer_sizes, dataset, use_bias, random_seed=1234): """ The dataset is the one from the mlp demo on deeplearning.net. This training function is lifted from there almost exactly. :type dataset: string :param dataset: the path of the MNIST dataset file from http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz """ assert len(layer_sizes) - 1 == len(dropout_rates) # extract the params for momentum mom_start = mom_params["start"] mom_end = mom_params["end"] mom_epoch_interval = mom_params["interval"] datasets = load_mnist(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch epoch = T.scalar() x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels learning_rate = theano.shared(np.asarray(initial_learning_rate, dtype=theano.config.floatX)) rng = np.random.RandomState(random_seed) # construct the MLP class classifier = MLP(rng=rng, input=x, layer_sizes=layer_sizes, dropout_rates=dropout_rates, activations=activations, use_bias=use_bias) # Build the expresson for the cost function. cost = classifier.negative_log_likelihood(y) dropout_cost = classifier.dropout_negative_log_likelihood(y) # Compile theano function for testing. test_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size]}) #theano.printing.pydotprint(test_model, outfile="test_file.png", # var_with_name_simple=True) # Compile theano function for validation. validate_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size]}) #theano.printing.pydotprint(validate_model, outfile="validate_file.png", # var_with_name_simple=True) # Compute gradients of the model wrt parameters gparams = [] for param in classifier.params: # Use the right cost function here to train with or without dropout. gparam = T.grad(dropout_cost if dropout else cost, param) gparams.append(gparam) # ... and allocate mmeory for momentum'd versions of the gradient gparams_mom = [] for param in classifier.params: gparam_mom = theano.shared(np.zeros(param.get_value(borrow=True).shape, dtype=theano.config.floatX)) gparams_mom.append(gparam_mom) # Compute momentum for the current epoch mom = ifelse(epoch < mom_epoch_interval, mom_start*(1.0 - epoch/mom_epoch_interval) + mom_end*(epoch/mom_epoch_interval), mom_end) # Update the step direction using momentum updates = OrderedDict() for gparam_mom, gparam in zip(gparams_mom, gparams): # Misha Denil's original version #updates[gparam_mom] = mom * gparam_mom + (1. - mom) * gparam # change the update rule to match Hinton's dropout paper updates[gparam_mom] = mom * gparam_mom - (1. - mom) * learning_rate * gparam # ... and take a step along that direction for param, gparam_mom in zip(classifier.params, gparams_mom): # Misha Denil's original version #stepped_param = param - learning_rate * updates[gparam_mom] # since we have included learning_rate in gparam_mom, we don't need it # here stepped_param = param + updates[gparam_mom] # This is a silly hack to constrain the norms of the rows of the weight # matrices. This just checks if there are two dimensions to the # parameter and constrains it if so... maybe this is a bit silly but it # should work for now. if param.get_value(borrow=True).ndim == 2: #squared_norms = T.sum(stepped_param**2, axis=1).reshape((stepped_param.shape[0],1)) #scale = T.clip(T.sqrt(squared_filter_length_limit / squared_norms), 0., 1.) #updates[param] = stepped_param * scale # constrain the norms of the COLUMNs of the weight, according to # https://github.com/BVLC/caffe/issues/109 col_norms = T.sqrt(T.sum(T.sqr(stepped_param), axis=0)) desired_norms = T.clip(col_norms, 0, T.sqrt(squared_filter_length_limit)) scale = desired_norms / (1e-7 + col_norms) updates[param] = stepped_param * scale else: updates[param] = stepped_param # Compile theano function for training. This returns the training cost and # updates the model parameters. output = dropout_cost if dropout else cost train_model = theano.function(inputs=[epoch, index], outputs=output, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size]}) #theano.printing.pydotprint(train_model, outfile="train_file.png", # var_with_name_simple=True) # Theano function to decay the learning rate, this is separate from the # training function because we only want to do this once each epoch instead # of after each minibatch. decay_learning_rate = theano.function(inputs=[], outputs=learning_rate, updates={learning_rate: learning_rate * learning_rate_decay}) ############### # TRAIN MODEL # ############### print '... training' best_params = None best_validation_errors = np.inf best_iter = 0 test_score = 0. epoch_counter = 0 start_time = time.clock() results_file = open(results_file_name, 'wb') while epoch_counter < n_epochs: # Train this epoch epoch_counter = epoch_counter + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(epoch_counter, minibatch_index) # Compute loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_errors = np.sum(validation_losses) # Report and save progress. print "epoch {}, test error {}, learning_rate={}{}".format( epoch_counter, this_validation_errors, learning_rate.get_value(borrow=True), " **" if this_validation_errors < best_validation_errors else "") best_validation_errors = min(best_validation_errors, this_validation_errors) results_file.write("{0}\n".format(this_validation_errors)) results_file.flush() new_learning_rate = decay_learning_rate() end_time = time.clock() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_errors * 100., best_iter, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def test_mlp(initial_learning_rate, learning_rate_decay, squared_filter_length_limit, n_epochs, batch_size, mom_params, activations, dropout, dropout_rates, results_file_name, layer_sizes, dataset, use_bias, random_seed=1234): """ The dataset is the one from the mlp demo on deeplearning.net. This training function is lifted from there almost exactly. :type dataset: string :param dataset: the path of the MNIST dataset file from http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz """ assert len(layer_sizes) - 1 == len(dropout_rates) # extract the params for momentum mom_start = mom_params["start"] mom_end = mom_params["end"] mom_epoch_interval = mom_params["interval"] datasets = load_mnist(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch epoch = T.scalar() x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels learning_rate = theano.shared( np.asarray(initial_learning_rate, dtype=theano.config.floatX)) rng = np.random.RandomState(random_seed) # construct the MLP class classifier = MLP(rng=rng, input=x, layer_sizes=layer_sizes, dropout_rates=dropout_rates, activations=activations, use_bias=use_bias) # Build the expresson for the cost function. cost = classifier.negative_log_likelihood(y) dropout_cost = classifier.dropout_negative_log_likelihood(y) # Compile theano function for testing. test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) #theano.printing.pydotprint(test_model, outfile="test_file.png", # var_with_name_simple=True) # Compile theano function for validation. validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) #theano.printing.pydotprint(validate_model, outfile="validate_file.png", # var_with_name_simple=True) # Compute gradients of the model wrt parameters gparams = [] for param in classifier.params: # Use the right cost function here to train with or without dropout. gparam = T.grad(dropout_cost if dropout else cost, param) gparams.append(gparam) # ... and allocate mmeory for momentum'd versions of the gradient gparams_mom = [] for param in classifier.params: gparam_mom = theano.shared( np.zeros(param.get_value(borrow=True).shape, dtype=theano.config.floatX)) gparams_mom.append(gparam_mom) # Compute momentum for the current epoch mom = ifelse( epoch < mom_epoch_interval, mom_start * (1.0 - epoch / mom_epoch_interval) + mom_end * (epoch / mom_epoch_interval), mom_end) # Update the step direction using momentum updates = OrderedDict() for gparam_mom, gparam in list(zip(gparams_mom, gparams)): # Misha Denil's original version #updates[gparam_mom] = mom * gparam_mom + (1. - mom) * gparam # change the update rule to match Hinton's dropout paper updates[gparam_mom] = mom * gparam_mom - (1. - mom) * learning_rate * gparam # ... and take a step along that direction for param, gparam_mom in list(zip(classifier.params, gparams_mom)): # Misha Denil's original version #stepped_param = param - learning_rate * updates[gparam_mom] # since we have included learning_rate in gparam_mom, we don't need it # here stepped_param = param + updates[gparam_mom] # This is a silly hack to constrain the norms of the rows of the weight # matrices. This just checks if there are two dimensions to the # parameter and constrains it if so... maybe this is a bit silly but it # should work for now. if param.get_value(borrow=True).ndim == 2: #squared_norms = T.sum(stepped_param**2, axis=1).reshape((stepped_param.shape[0],1)) #scale = T.clip(T.sqrt(squared_filter_length_limit / squared_norms), 0., 1.) #updates[param] = stepped_param * scale # constrain the norms of the COLUMNs of the weight, according to # https://github.com/BVLC/caffe/issues/109 col_norms = T.sqrt(T.sum(T.sqr(stepped_param), axis=0)) desired_norms = T.clip(col_norms, 0, T.sqrt(squared_filter_length_limit)) scale = desired_norms / (1e-7 + col_norms) updates[param] = stepped_param * scale else: updates[param] = stepped_param # Compile theano function for training. This returns the training cost and # updates the model parameters. output = dropout_cost if dropout else cost train_model = theano.function( inputs=[epoch, index], outputs=output, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) #theano.printing.pydotprint(train_model, outfile="train_file.png", # var_with_name_simple=True) # Theano function to decay the learning rate, this is separate from the # training function because we only want to do this once each epoch instead # of after each minibatch. decay_learning_rate = theano.function( inputs=[], outputs=learning_rate, updates={learning_rate: learning_rate * learning_rate_decay}) ############### # TRAIN MODEL # ############### print('... training') best_params = None best_validation_errors = np.inf best_iter = 0 test_score = 0. epoch_counter = 0 start_time = time.clock() results_file = open(results_file_name, 'wb') while epoch_counter < n_epochs: # Train this epoch epoch_counter = epoch_counter + 1 for minibatch_index in range(int(n_train_batches)): minibatch_avg_cost = train_model(epoch_counter, minibatch_index) # Compute loss on validation set validation_losses = [ test_model(i) for i in range(int(n_valid_batches)) ] this_validation_errors = np.sum(validation_losses) # Report and save progress. print("epoch {}, test error {}, learning_rate={}{}".format( epoch_counter, this_validation_errors, learning_rate.get_value(borrow=True), " **" if this_validation_errors < best_validation_errors else "")) best_validation_errors = min(best_validation_errors, this_validation_errors) results_file.write( bytes("{0}\n".format(this_validation_errors), 'UTF-8')) results_file.flush() new_learning_rate = decay_learning_rate() end_time = time.clock() s = 'The code for file ' + os.path.split( __file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.) print(s, end="", file=sys.stderr)
def run(network_type='cnn', include_st=True, transformation = rotation, iterations=1.5e5, n_epochs=200, batch_size=256, learning_rate=0.01, decay_param=0.1, activation=T.nnet.relu, verbose=True, runallmode = False): if runallmode == True: num_modes = 2 else: num_modes = 1 # load MNIST dataset from local directory print('...loading the dataset') dataset = load_mnist() # partition into relevant datasets train_set_x, train_set_y = dataset[0] valid_set_x, valid_set_y = dataset[1] test_set_x , test_set_y = dataset[2] train_set_x # Get transformed dataset print('...Applying {0} to dataset'.format(transformation.__name__)) train_set_x = transformation(train_set_x) valid_set_x = transformation(valid_set_x) test_set_x = transformation(test_set_x) # compute minibatches n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # output dataset info if verbose: print('Current training data size is %i' %train_set_x.shape[0].eval()) print('Current validation data size is %i' %valid_set_x.shape[0].eval()) print('Current test data size is %i' %test_set_x.shape[0].eval()) print('...building the model') rng = np.random.RandomState(23455) x = T.matrix('x') y = T.ivector('y') index = T.lscalar() ############################ # FULLY CONNECTED NETWORK # ############################ fcn_params = { 'input_dim': (batch_size, 1, 28, 28), 'h_units': 128, 'h_layers': 2, 'theta_dim': 6, 'L1': 0.00, 'L2': 0.0001, } if network_type in ['fcn', 'FCN', 'fully-connected']: for mode in range(num_modes): print("...training fcn with include_st = {0}".format(include_st)) # check if spatial transformer should be included if include_st: st = STN_FCN( input_dim = fcn_params['input_dim'], input = x.reshape((batch_size, 1, 28, 28)) ) fcn_input = st.output fcn_input = fcn_input.reshape((batch_size, 28*28)) else: fcn_input = x classifier = MultiLayerPerceptron( rng=rng, input=fcn_input, n_in=28*28, n_hidden=fcn_params['h_units'], n_out=10, n_hiddenLayers=fcn_params['h_layers'], act_function=activation ) # cost to minimize during training cost = (classifier.negative_log_likelihood(y) + fcn_params['L1'] * classifier.L1 + fcn_params['L2'] * classifier.L2_sqr ) # testing test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) # validation validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) if include_st: classifier.params = classifier.params + st.params # compute graident of cost with respect to parameters gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update parameter updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] # training train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) print('...training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) include_st = not include_st print("----------------------------------------------------------------------\n\n") ############################ # CONVOLUTIONAL NETWORK # ############################ cnn_params = { 'input_dim': (batch_size, 1, 28, 28), 'filter': 32 } if network_type in ['cnn', 'CNN', 'convolutional']: for mode in range(num_modes): print("...training cnn with include_st = {0}".format(include_st)) if include_st: print "...Apply STN before CNN" st = STN_CNN( input_dim= cnn_params['input_dim'], img=x.reshape((batch_size, 1, 28, 28)), nconvs=[20,20], downsampling=0.5, scale=2 ) cnn_input = st.output else: cnn_input = x.reshape((batch_size, 1, 28, 28)) # first convolutional pooling layer # filtering reduces to 20, maxpooling to 10 # 4D output tensor is thus of shape (batch_size, 32, 10, 10) layer0 = ConvPoolLayer( rng=rng, input=cnn_input, image_shape=(batch_size, 1, 28, 28), filter_shape=(cnn_params['filter'], 1, 9, 9), poolsize=(2,2) ) # second convolutional pooling layer # filter reduces to 4, maxpooling to 2 # 4D output tensor is thus of shape (batch_size, 32, 2, 2) layer1 = ConvPoolLayer( rng=rng, input=layer0.output, image_shape=(batch_size, cnn_params['filter'], 10, 10), filter_shape=(cnn_params['filter'], cnn_params['filter'], 7, 7), poolsize=(2,2) ) # classification layer2 = LogisticRegression( input=layer1.output.flatten(2), n_in=cnn_params['filter']*2*2, n_out=10 ) # cost we minimize during training cost = layer2.negative_log_likelihood(y) # testing test_model = theano.function( [index], layer2.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) # validation validate_model = theano.function( [index], layer2.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # list of model parameters to be fitted by gradient descent params = (layer2.params + layer1.params + layer0.params) if include_st: params += st.params # list of gradients for all model parameters grads = T.grad(cost,params) # specify how to update parameters updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] # training train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) print('...training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) include_st = not include_st print("----------------------------------------------------------------------\n\n")
# choose the number of samples #ntrain = 1000 #ntest = 167 ntrain = 7500 ntest = 1250 # load the data train_signals_path = 'C:/Users/Adrien/Documents/Datasets/MNIST_spheres/signals/sphere_1502_ratio=2.000000_training_7500.txt' test_signals_path = 'C:/Users/Adrien/Documents/Datasets/MNIST_spheres/signals/sphere_1502_ratio=2.000000_test_1250.txt' train_labels_path = 'C:/Users/Adrien/Documents/Datasets/MNIST_spheres/labels/training_labels_7500.txt' test_labels_path = 'C:/Users/Adrien/Documents/Datasets/MNIST_spheres/labels/test_labels_1250.txt' (x_train, y_train), (x_test, y_test) = load_mnist(train_signals_path, test_signals_path, train_labels_path, test_labels_path, nv, ntrain, ntest) x_train = x_train.reshape(x_train.shape[0], nv, 1, 1) x_test = x_test.reshape(x_test.shape[0], nv, 1, 1) input_shape = (nv, 1, 1) # convert class vectors to binary class matrices y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) print(x_train.shape) print(x_test.shape)
from model import Model from load_data import load_cifar, load_mnist, load_PA100K, load_PA100K_10 import time print("Loading MNIST ...") x_train_mnist, y_train_mnist, x_test_mnist, y_test_mnist = load_mnist() print("Done") # Training for the mnist dataset. print("**************** Training on MNIST *****************") model = Model() model.train(x_train_mnist, y_train_mnist) _ = model.test(x_test_mnist, y_test_mnist) # To empty the RAM del x_train_mnist del y_train_mnist del x_test_mnist del y_test_mnist del model time.sleep(5) print("Loading CIFAR-100 ...") x_train_cifar, y_train_cifar, x_test_cifar, y_test_cifar = load_cifar() print("Done") # Training for the cifar images. print("**************** Training on CIFAR-100 *****************") model = Model() model.train(x_train_cifar, y_train_cifar)
def test_mlp(initial_learning_rate, learning_rate_decay, squared_filter_length_limit, n_epochs, batch_size, dropout, results_file_name, layer_sizes, dataset, use_bias): """ The dataset is the one from the mlp demo on deeplearning.net. This training function is lifted from there almost exactly. :type dataset: string :param dataset: the path of the MNIST dataset file from http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz """ datasets = load_mnist(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch epoch = T.scalar() x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels learning_rate = theano.shared( np.asarray(initial_learning_rate, dtype=theano.config.floatX)) rng = np.random.RandomState(1234) # construct the MLP class classifier = MLP(rng=rng, input=x, layer_sizes=layer_sizes, use_bias=use_bias) # Build the expresson for the cost function. cost = classifier.negative_log_likelihood(y) dropout_cost = classifier.dropout_negative_log_likelihood(y) # Compile theano function for testing. test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) #theano.printing.pydotprint(test_model, outfile="test_file.png", # var_with_name_simple=True) # Compile theano function for validation. validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) #theano.printing.pydotprint(validate_model, outfile="validate_file.png", # var_with_name_simple=True) # Compute gradients of the model wrt parameters gparams = [] for param in classifier.params: # Use the right cost function here to train with or without dropout. gparam = T.grad(dropout_cost if dropout else cost, param) gparams.append(gparam) # ... and allocate mmeory for momentum'd versions of the gradient gparams_mom = [] for param in classifier.params: gparam_mom = theano.shared( np.zeros(param.get_value(borrow=True).shape, dtype=theano.config.floatX)) gparams_mom.append(gparam_mom) # Compute momentum for the current epoch mom = ifelse(epoch < 500, 0.5 * (1. - epoch / 500.) + 0.99 * (epoch / 500.), 0.99) # Update the step direction using momentum updates = {} for gparam_mom, gparam in zip(gparams_mom, gparams): updates[gparam_mom] = mom * gparam_mom + (1. - mom) * gparam # ... and take a step along that direction for param, gparam_mom in zip(classifier.params, gparams_mom): stepped_param = param - (1. - mom) * learning_rate * gparam_mom # This is a silly hack to constrain the norms of the rows of the weight # matrices. This just checks if there are two dimensions to the # parameter and constrains it if so... maybe this is a bit silly but it # should work for now. if param.get_value(borrow=True).ndim == 2: squared_norms = T.sum(stepped_param**2, axis=1).reshape( (stepped_param.shape[0], 1)) scale = T.clip(T.sqrt(squared_filter_length_limit / squared_norms), 0., 1.) updates[param] = stepped_param * scale else: updates[param] = stepped_param # Compile theano function for training. This returns the training cost and # updates the model parameters. output = dropout_cost if dropout else cost train_model = theano.function( inputs=[epoch, index], outputs=output, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) #theano.printing.pydotprint(train_model, outfile="train_file.png", # var_with_name_simple=True) # Theano function to decay the learning rate, this is separate from the # training function because we only want to do this once each epoch instead # of after each minibatch. decay_learning_rate = theano.function( inputs=[], outputs=learning_rate, updates={learning_rate: learning_rate * learning_rate_decay}) ############### # TRAIN MODEL # ############### print '... training' best_params = None best_validation_errors = np.inf best_iter = 0 test_score = 0. epoch_counter = 0 start_time = time.clock() results_file = open(results_file_name, 'wb') while epoch_counter < n_epochs: # Train this epoch epoch_counter = epoch_counter + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(epoch_counter, minibatch_index) # Compute loss on validation set validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] this_validation_errors = np.sum(validation_losses) # Report and save progress. print "epoch {}, test error {}, learning_rate={}{}".format( epoch_counter, this_validation_errors, learning_rate.get_value(borrow=True), " **" if this_validation_errors < best_validation_errors else "") best_validation_errors = min(best_validation_errors, this_validation_errors) results_file.write("{0}\n".format(this_validation_errors)) results_file.flush() new_learning_rate = decay_learning_rate() end_time = time.clock() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_errors * 100., best_iter, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
if __name__ == '__main__': # save_log = 0 L2 = 1 drop = 1 bnn = 1 train_x, valid_x, test_x, train_y, valid_y, test_y = load_data.load_mnist() train_x, train_y = load_data.get_equal_each_class(n_classes=10, n_datapoints_each_class=100, data_x=train_x, data_y=train_y) valid_x, valid_y = load_data.get_equal_each_class(n_classes=10, n_datapoints_each_class=20, data_x=valid_x, data_y=valid_y) print train_x.shape print train_y.shape print valid_x.shape print valid_y.shape network_architecture = [784, 100, 100, 10] act_functions=[tf.nn.softplus,tf.nn.softplus, None] display_step = 300 batch_size = 100
def main(num_epochs=200, batch_norm=True): import matplotlib.pyplot as plt from PIL import Image batch_size = 128 noise_size = 10 print('Loading data') datasets = load_mnist() train_x, train_y = datasets[0] valid_x, valid_y = datasets[1] test_x, test_y = datasets[2] image_var = T.matrix('image') representation_var = T.matrix('representation') print("Building model") generator = build_generator(representation_var, batch_norm) discriminator = build_discriminator(image_var, batch_norm) max_gradient = theano.function([], discriminator.max_gradient) real_out = lasagne.layers.get_output(discriminator)[:, 0] fake_out = lasagne.layers.get_output( discriminator, lasagne.layers.get_output(generator))[:, 0] return_representation = lasagne.layers.get_output( discriminator, lasagne.layers.get_output(generator))[:, 1:] return_image = lasagne.layers.get_output( generator, lasagne.layers.get_output(discriminator)[:, 1:]) reconstruction_loss1 = ((return_image - image_var)**2).mean() reconstruction_loss2 = ((return_representation - representation_var)**2).mean() reconstruction_loss = reconstruction_loss1 + reconstruction_loss2 generator_loss = (fake_out**2).mean() discriminator_loss = (real_out**2).mean() + ( (1.0 - fake_out)**2).mean() + 0.1 * discriminator.norm generator_params = lasagne.layers.get_all_params(generator, trainable=True) discriminator_params = lasagne.layers.get_all_params(discriminator, trainable=True) total_params = generator_params + discriminator_params autoencoder_updates = lasagne.updates.rmsprop(reconstruction_loss1, total_params, learning_rate=0.05) doubleencoder_updates = lasagne.updates.rmsprop(reconstruction_loss, total_params, learning_rate=0.05) generator_updates = lasagne.updates.sgd(generator_loss, generator_params, learning_rate=0.05) discriminator_updates = lasagne.updates.sgd(discriminator_loss, discriminator_params, learning_rate=0.05) print("Compiling functions") autoencoder_train = theano.function([image_var], reconstruction_loss1, updates=autoencoder_updates) doubleencoder_train = theano.function([representation_var, image_var], reconstruction_loss, updates=doubleencoder_updates) generator_train_fn = theano.function([representation_var], updates=generator_updates) discriminator_train_fn = theano.function([representation_var, image_var], updates=discriminator_updates) get_max_gradient = theano.function([], discriminator.max_gradient) gen_fn = theano.function([representation_var], lasagne.layers.get_output(generator, deterministic=True)) get_real_score = theano.function([image_var], real_out.mean()) get_fake_score = theano.function([representation_var], fake_out.mean()) print("Making autoencoder") for epoch in range(10): print("Starting Epoch %d" % epoch) start_time = time.time() auto_cost = [] for batch in iterate_minibatches(train_x, train_y, batch_size): inputs, targets = batch auto_cost.append(autoencoder_train(inputs)) # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format(epoch, num_epochs, time.time() - start_time)) print("autoencoder loss: ", np.mean(auto_cost)) # And finally, we plot some generated data samples = 255 * gen_fn( lasagne.utils.floatX(np.random.rand(20, noise_size))) for i in range(20): array = np.array(samples[i]) array = array.reshape((28, 28)) im = Image.fromarray(array).convert('L') im.save('mnist_' + str(i) + '.png') print('Images saved') print("Making doubleencoder") for epoch in range(10): print("Starting Epoch %d" % epoch) start_time = time.time() double_cost = [] for batch in iterate_minibatches(train_x, train_y, batch_size): inputs, targets = batch noise = lasagne.utils.floatX(np.random.rand( batch_size, noise_size)) double_cost.append(doubleencoder_train(noise, inputs)) # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format(epoch, num_epochs, time.time() - start_time)) print("doubleencoder loss: ", np.mean(double_cost)) # And finally, we plot some generated data samples = 255 * gen_fn( lasagne.utils.floatX(np.random.rand(20, noise_size))) for i in range(20): array = np.array(samples[i]) array = array.reshape((28, 28)) im = Image.fromarray(array).convert('L') im.save('mnist_' + str(i) + '.png') print('Images saved') print("Training") for epoch in range(num_epochs): real_sum = 0 fake_sum = 0 valid_batches = 0 for batch in iterate_minibatches(valid_x, valid_y, batch_size): inputs, targets = batch noise = lasagne.utils.floatX(np.random.rand( batch_size, noise_size)) real_sum += get_real_score(inputs) fake_sum += get_fake_score(noise) valid_batches += 1 real_score = real_sum / valid_batches fake_score = fake_sum / valid_batches print("real score: %f" % real_score) print("fake score: %f" % fake_score) print("max gradient: %f" % get_max_gradient()) print("Starting Epoch %d" % epoch) start_time = time.time() for batch in iterate_minibatches(train_x, train_y, batch_size): inputs, targets = batch noise = lasagne.utils.floatX(np.random.rand( batch_size, noise_size)) if np.random.random() < 0.2: discriminator_train_fn(noise, inputs) else: generator_train_fn(noise, inputs) # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format(epoch, num_epochs, time.time() - start_time)) # And finally, we plot some generated data samples = 255 * gen_fn( lasagne.utils.floatX(np.random.rand(20, noise_size))) for i in range(20): array = np.array(samples[i]) array = array.reshape((28, 28)) im = Image.fromarray(array).convert('L') im.save('mnist_' + str(i) + '.png') print('Images saved')
return np.array(sigmas) NUM_POINTS = 500 CLASSES_TO_PLOT = [0, 1] PERPLEXITY = 5 SEED = 1 MOMENTUM = 0.0 ETA = 0.1 # numpy RandomState for reproducability rng = np.random.RandomState(SEED) # Load the first 500 0's and 1's from the MNIST dataset X, y = load_mnist('/home/liam/datasets/', digits_to_keep=CLASSES_TO_PLOT, N=NUM_POINTS) # ============================================================================= # D = 100 # X = rng.normal(size=[NUM_POINTS, D]) / 10. # X[:NUM_POINTS//2, :D//2] = X[:NUM_POINTS//2, :D//2] + 0.5 # X[NUM_POINTS//2:, D//2:] = X[NUM_POINTS//2:, D//2:] + 0.5 # y = np.zeros(X.shape[0]) # y[NUM_POINTS//2:] = 1 # ============================================================================= # Get the negative euclidian distance from every data point to all others #distances = -np.square(distance_matrix(X, X)) distances = neg_squared_euc_dists(X) # Find optimal sigma for each row of this matrix, given our desired perplexity
def main_training(batch_size, num_classes, epochs, ntrain, ntest, train_signals_path, train_labels_path, train_path_c, train_path_t, test_signals_path, test_labels_path, test_path_c, test_path_t, val_signal_path, train_nv, test_nv, nrings, ndirs): # choose the number of samples # ntrain = 1000 # ntest = 167 # load the data (x_train, y_train), (x_test, y_test) = load_mnist(train_signals_path, test_signals_path, train_labels_path, test_labels_path, train_nv, ntrain, ntest) #x_train = x_train.reshape(x_train.shape[0], train_nv, 1, 1) #x_test = x_test.reshape(x_test.shape[0], train_nv, 1, 1) x_train = x_train.reshape(x_train.shape[0], train_nv) x_train = np.expand_dims(x_train, axis=2) # x_train = np.expand_dims(x_train, axis=3) x_test = x_test.reshape(x_test.shape[0], train_nv) x_test = np.expand_dims(x_test, axis=2) # x_test = np.expand_dims(x_test, axis=3) # convert class vectors to binary class matrices y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) print(x_train.shape) print(x_test.shape) # test data (x_val, y_val) = load_mnist_test(val_signal_path, test_labels_path, test_nv, ntest) x_val = x_val.reshape(x_test.shape[0], test_nv) x_val = np.expand_dims(x_val, axis=2) # x_val = np.expand_dims(x_val, axis=3) # convert class vectors to binary class matrices y_val = keras.utils.to_categorical(y_val, num_classes) # load patch operators train_connectivity = load_dense_matrix(train_path_c) train_transport = load_dense_matrix(train_path_t) test_connectivity = load_dense_matrix(test_path_c) test_transport = load_dense_matrix(test_path_t) # setup architecture data_input, predictions = GCNN2_sparse2(nb_classes=num_classes, n_batch=batch_size, n_v=train_nv, n_dirs=ndirs, n_rings=nrings, connectivity=train_connectivity, transport=train_transport) # create the model model = Model(inputs=data_input, outputs=predictions) # choose the optimizer optim = keras.optimizers.Adam() loss = keras.losses.categorical_crossentropy # optim = keras.optimizers.Adadelta() model.compile(loss=loss, optimizer=optim, metrics=['accuracy']) # train the model model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test)) # test/evaluate score = model.evaluate(x_test, y_test, batch_size=batch_size, verbose=0) print('Test loss same shape:', score[0]) print('Test accuracy same shape:', score[1]) predict = model.predict(x_test, batch_size=batch_size, verbose=0) def test_on_other_shape(connectivity_, transport_, signal, labels, n_batch, n_v, n_dirs, n_rings): test_data, test_pred = GCNN2_sparse2(nb_classes=num_classes, n_batch=n_batch, n_v=n_v, n_dirs=n_dirs, n_rings=n_rings, connectivity=connectivity_, transport=transport_) test_model = Model(inputs=test_data, outputs=test_pred) test_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=optim, metrics=['accuracy']) test_model.set_weights(model.get_weights()) my_score = test_model.evaluate(signal, labels, batch_size=n_batch, verbose=0) print('Test loss:', my_score[0]) print('Test accuracy:', my_score[1]) print('test: ') test_on_other_shape(connectivity_=test_connectivity, transport_=test_transport, signal=x_val, labels=y_val, n_batch=batch_size, n_v=test_nv, n_dirs=ndirs, n_rings=nrings) return 0
import os from load_data import load_mnist from model import create_model import tensorflow as tf test_images, test_labels = load_mnist('./fashion', kind='t10k') model = create_model() checkpoint_path = 'training_models/cp-{epoch:04d}.ckpt' checkpoint_dir = os.path.dirname(checkpoint_path) latest = tf.train.latest_checkpoint(checkpoint_dir) if latest: model.load_weights(latest) test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2) print('\nTest accuracy:', test_acc) print('\nTest loss:', test_loss)
def run_ResNet(dataset, depth, n_epochs, batch_size, lookahead, alpha0, experiment_dir, epsilon, random_seed, output_file_base_name, gradient_clipping=None, force=False, n_validation_resamples=3., n_test_resamples=5.): # LOAD DATA if "mnist_plus_rot" in dataset: datasets = load_mnist_w_rotations(dataset, flatten=False, split=(70000, 10000, 20000)) dataset_name = "mnist_w_rotation" input_layer = InputLayer(shape=(None, 1, 28, 28)) output_size = 10 elif "mnist" in dataset: # We follow the approach used in [2] to split the MNIST dataset. datasets = load_mnist(dataset, flatten=False, split=(45000, 5000, 10000)) dataset_name = "mnist" input_layer = InputLayer(shape=(None, 1, 28, 28)) output_size = 10 elif "cifar10" in dataset: # We split the Cifar-10 dataset according to [2]. datasets = load_cifar10(dataset, flatten=False, split=(45000, 5000, 10000)) dataset_name = "cifar10" input_layer = InputLayer(shape=(None, 3, 32, 32)) output_size = 10 train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] train_set_size = int(train_set_y.shape[0].eval()) valid_set_size = int(valid_set_y.shape[0].eval()) test_set_size = int(test_set_y.shape[0].eval()) print 'Dataset {} loaded ({:,}|{:,}|{:,})'.format(dataset_name, train_set_size, valid_set_size, test_set_size) # compute number of minibatches for training, validation and testing n_train_batches = int(np.ceil(train_set_size / batch_size)) n_valid_batches = int(np.ceil(valid_set_size / batch_size)) n_test_batches = int(np.ceil(test_set_size / batch_size)) # BUILD MODEL print 'Building the model ...' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch index.tag.test_value = 0 # epoch = T.scalar() x = T.tensor4('x') # the data is presented as rasterized images y = T.vector( 'y') # the labels are presented as 1D vector of [floatX] labels. # Test values are useful for debugging with THEANO_FLAGS="compute_test_value=warn" x.tag.test_value = train_set_x[:batch_size].eval() y.tag.test_value = train_set_y[:batch_size].eval() input_layer.input_var = x layers_per_phase = ((depth - 2) // 9) * 3 network, infos = build_sb_resnet(input_layer, depth, output_size) print "Number of parameters in model: {:,}".format( lasagne.layers.count_params(network, trainable=True)) # Create a loss expression for training, i.e., a scalar objective we want # to minimize (for our multi-class problem, it is the cross-entropy loss): prediction = lasagne.layers.get_output(network) ll_term = lasagne.objectives.categorical_crossentropy( prediction, T.cast(y, dtype="int32")) kl_term_1 = calc_kl_divergence(infos[0], alpha=1., beta=alpha0) kl_term_2 = calc_kl_divergence(infos[1], alpha=1., beta=alpha0) kl_term_3 = calc_kl_divergence(infos[2], alpha=1., beta=alpha0) kl_term = kl_term_1 + kl_term_2 + kl_term_3 cost = T.mean(ll_term + kl_term) # Compute average number of layers that have a stick length >= 1% in each phase. avg_n_layers_phase1 = calc_avg_n_layers(infos[0]) avg_n_layers_phase2 = calc_avg_n_layers(infos[1]) avg_n_layers_phase3 = calc_avg_n_layers(infos[2]) avg_kl_term_1 = T.mean(kl_term_1) avg_kl_term_2 = T.mean(kl_term_2) avg_kl_term_3 = T.mean(kl_term_3) # Build the expresson for the cost function. params = lasagne.layers.get_all_params(network, trainable=True) # If params already exist and 'force' is False, reload parameters. params_pkl_filename = pjoin( experiment_dir, 'conv_sb-resnet_params_' + output_file_base_name + '.pkl') print "Checking if '{}' already exists.".format(params_pkl_filename) if os.path.isfile(params_pkl_filename) and not force: print "Yes! Reloading existing parameters and resuming training (use --force to overwrite)." last_params = cPickle.load(open(params_pkl_filename, 'rb')) for param, last_param in zip(params, last_params): param.set_value(last_param) elif force: print "Yes! but --force was used. Starting from scratch." else: print "No! Starting from scratch." gradients = dict(zip(params, T.grad(cost, params))) if gradient_clipping is not None: grad_norm = T.sqrt( sum(map(lambda d: T.sqr(d).sum(), gradients.values()))) # Note that rescaling is one if grad_norm <= threshold. rescaling = gradient_clipping / T.maximum(grad_norm, gradient_clipping) new_gradients = OrderedDict() for param, gparam in gradients.items(): gparam_clipped = gparam * rescaling new_gradients[param] = gparam_clipped gradients = new_gradients updates = utils.get_adam_updates_from_gradients(gradients) # Compile theano function for training. This updates the model parameters and # returns the training nll term, kl term, and the avg. nb. of layers used in each phase. print 'Compiling train function ...' compiling_start = time.time() train_model = theano.function( inputs=[index], outputs=[ ll_term.mean(), kl_term.mean(), avg_n_layers_phase1, avg_n_layers_phase2, avg_n_layers_phase3, avg_kl_term_1, avg_kl_term_2, avg_kl_term_3 ], updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) print "{:.2f}".format((time.time() - compiling_start) / 60.) # Create a loss expression for validation/testing test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy( test_prediction, T.cast(y, dtype="int32")) test_loss = test_loss.mean() test_error = T.sum(T.neq(T.argmax(test_prediction, axis=1), y), dtype=theano.config.floatX) print 'Compiling valid function ...' compiling_start = time.time() valid_model = theano.function( inputs=[index], outputs=[test_loss, test_error], givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) print "{:.2f}".format((time.time() - compiling_start) / 60.) print 'Compiling test function ...' compiling_start = time.time() test_model = theano.function( inputs=[index], outputs=[test_loss, test_error], givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) print "{:.2f}".format((time.time() - compiling_start) / 60.) ############### # TRAIN MODEL # ############### print 'Training for {} epochs ...'.format(n_epochs) best_params = None best_valid_error = np.inf best_iter = 0 start_time = time.clock() results_filename = pjoin( experiment_dir, "conv_sb-resnet_results_" + output_file_base_name + ".txt") if os.path.isfile(results_filename) and not force: last_result = open(results_filename, 'rb').readlines()[-1] idx_start = len("epoch ") idx_end = last_result.find(",", idx_start + 1) start_epoch = int(last_result[idx_start:idx_end]) + 1 results_file = open(results_filename, 'ab') else: start_epoch = 0 results_file = open(results_filename, 'wb') stop_training = False for epoch_counter in range(start_epoch, n_epochs): if stop_training: break # Train this epoch epoch_start_time = time.time() avg_training_loss_tracker = 0. avg_training_kl_tracker = 0. avg_n_layers_phase1_tracker = 0. avg_n_layers_phase2_tracker = 0. avg_n_layers_phase3_tracker = 0. avg_kl_term_1_tracker = 0. avg_kl_term_2_tracker = 0. avg_kl_term_3_tracker = 0. for minibatch_index in xrange(n_train_batches): avg_training_loss, avg_training_kl, avg_n_layers_phase1, avg_n_layers_phase2, avg_n_layers_phase3, avg_kl_term_1, avg_kl_term_2, avg_kl_term_3 = train_model( minibatch_index) if minibatch_index % 1 == 0: results = "batch #{}-{}, avg n_layers per phase ({:.2f}|{:.2f}|{:.2f})/{}, training loss (nll) {:.4f}, training kl-div {:.4f} ({:.4f}|{:.4f}|{:.4f}), time {:.2f}m" results = results.format(epoch_counter, minibatch_index, float(avg_n_layers_phase1), float(avg_n_layers_phase2), float(avg_n_layers_phase3), layers_per_phase, float(avg_training_loss), float(avg_training_kl), float(avg_kl_term_1), float(avg_kl_term_2), float(avg_kl_term_3), (time.time() - epoch_start_time) / 60.) print results if np.isnan(avg_training_loss): msg = "NaN detected! Stopping." print msg results_file.write(msg + "\n") results_file.flush() sys.exit(1) avg_training_loss_tracker += avg_training_loss avg_training_kl_tracker += avg_training_kl avg_n_layers_phase1_tracker += avg_n_layers_phase1 avg_n_layers_phase2_tracker += avg_n_layers_phase2 avg_n_layers_phase3_tracker += avg_n_layers_phase3 avg_kl_term_1_tracker += avg_kl_term_1 avg_kl_term_2_tracker += avg_kl_term_2 avg_kl_term_3_tracker += avg_kl_term_3 epoch_end_time = time.time() # Compute some infos about training. avg_training_loss_tracker /= n_train_batches avg_training_kl_tracker /= n_train_batches avg_n_layers_phase1_tracker /= n_train_batches avg_n_layers_phase2_tracker /= n_train_batches avg_n_layers_phase3_tracker /= n_train_batches avg_kl_term_1_tracker /= n_train_batches avg_kl_term_2_tracker /= n_train_batches avg_kl_term_3_tracker /= n_train_batches # Compute validation error --- sample multiple times to simulate posterior predictive distribution valid_errors = np.zeros((n_valid_batches, )) valid_loss = np.zeros((n_valid_batches, )) for idx in xrange(int(n_validation_resamples)): temp_valid_loss, temp_valid_errors = zip( *[valid_model(i) for i in xrange(n_valid_batches)]) valid_errors += temp_valid_errors valid_loss += temp_valid_loss valid_loss = np.sum( valid_loss / n_validation_resamples) / n_valid_batches valid_nb_errors = np.sum(valid_errors / n_validation_resamples) valid_error = valid_nb_errors / valid_set_size results = ( "epoch {}, avg n_layers per phase ({:.2f}|{:.2f}|{:.2f})/{}, train loss (nll) {:.4f}, " "train kl-div {:.4f}, train kl-div per phase ({:.4f}|{:.4f}|{:.4f}), " "valid loss {:.4f}, valid error {:.2%} ({:,}), time {:.2f}m") if valid_error < best_valid_error: best_iter = epoch_counter best_valid_error = valid_error results += " **" # Save progression best_params = [param.get_value().copy() for param in params] cPickle.dump(best_params, open(params_pkl_filename, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL) elif epoch_counter - best_iter > lookahead: stop_training = True # Report and save progress. results = results.format(epoch_counter, avg_n_layers_phase1_tracker, avg_n_layers_phase2_tracker, avg_n_layers_phase3_tracker, layers_per_phase, avg_training_loss_tracker, avg_training_kl_tracker, avg_kl_term_1_tracker, avg_kl_term_2_tracker, avg_kl_term_3_tracker, valid_loss, valid_error, valid_nb_errors, (epoch_end_time - epoch_start_time) / 60) print results results_file.write(results + "\n") results_file.flush() end_time = time.clock() # Reload best model. for param, best_param in zip(params, best_params): param.set_value(best_param) # Compute test error --- sample multiple times to simulate posterior predictive distribution test_errors = np.zeros((n_test_batches, )) test_loss = np.zeros((n_test_batches, )) for idx in xrange(int(n_test_resamples)): temp_test_loss, temp_test_errors = zip( *[test_model(i) for i in xrange(n_test_batches)]) test_errors += temp_test_errors test_loss += temp_test_loss test_loss = np.sum(test_loss / n_test_resamples) / n_test_batches test_nb_errors = np.sum(test_errors / n_test_resamples) test_error = test_nb_errors / test_set_size results = "Done! best epoch {}, test loss {:.4f}, test error {:.2%} ({:,}), training time {:.2f}m" results = results.format(best_iter, test_loss, test_error, test_nb_errors, (end_time - start_time) / 60) print results results_file.write(results + "\n") results_file.close() print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def main(model='standard', n_epochs=100): print('Loading data') datasets = load_mnist() train_x, train_y = datasets[0] valid_x, valid_y = datasets[1] test_x, test_y = datasets[2] input_var = T.matrix('inputs') target_var = T.matrix('targets') print("Building model") if model == 'standard': network = build_standard_cnn(input_var) elif model == 'maxout': network = build_maxout_cnn(input_var) max_gradient = theano.function([], network.max_gradient) prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() params = lasagne.layers.get_all_params(network, trainable=True) updates = lasagne.updates.sgd(loss, params, learning_rate=0.005) test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy( test_prediction, target_var) test_loss = test_loss.mean() test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), T.argmax(target_var, axis=1)), dtype=theano.config.floatX) print("Compiling Functions") train_fn = theano.function([input_var, target_var], loss, updates=updates) val_fn = theano.function([input_var, target_var], [test_loss, test_acc]) print('Training') for epoch in range(n_epochs): train_err = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(train_x, train_y, 500): inputs, targets = batch train_err += train_fn(inputs, targets) train_batches += 1 val_err = 0 val_acc = 0 val_batches = 0 for batch in iterate_minibatches(valid_x, valid_y, 500): inputs, targets = batch err, acc = val_fn(inputs, targets) val_err += err val_acc += acc val_batches += 1 print("Epoch {} of {} took {:.3f}s".format(epoch + 1, n_epochs, time.time() - start_time)) print(" training loss:\t\t{:.6f}".format(train_err / train_batches)) print(" validation loss:\t\t{:.6f}".format(val_err / val_batches)) print(" validation accuracy:\t\t{:.2f} %".format(val_acc / val_batches * 100)) if model == "maxout": print(" maximum gradient:\t\t{:.2f}".format(1.0 * max_gradient())) test_err = 0 test_acc = 0 test_batches = 0 for batch in iterate_minibatches(test_x, test_y, 500): inputs, targets = batch err, acc = val_fn(inputs, targets) test_err += err test_acc += acc test_batches += 1 print("Final results:") print(" test loss:\t\t\t{:.6f}".format(test_err / test_batches)) print(" test accuracy:\t\t{:.2f} %".format(test_acc / test_batches * 100))
sgd_params = {} sgd_params['start_rate'] = 0.25 sgd_params['decay_rate'] = 0.998 sgd_params['wt_norm_bound'] = 3.75 sgd_params['epochs'] = 1000 sgd_params['batch_size'] = 100 # Set parameters for the network to be trained mlp_params = {} mlp_params['layer_sizes'] = [28 * 28, 800, 800, 10] mlp_params['lam_l2a'] = 1e-3 mlp_params['dev_clones'] = 1 mlp_params['dev_types'] = [1, 1, 2] mlp_params['dev_lams'] = [0.1, 0.1, 2.0] mlp_params['use_bias'] = 1 # Pick a some data to train with datasets = load_mnist('data/mnist_batches.npz') #datasets = load_umontreal_data('data/mnist.pkl') # Set the type of network to train, based on user input if (len(sys.argv) != 3): print "Usage: {0} [raw|sde|dev] [result_tag]".format(sys.argv[0]) exit(1) elif sys.argv[1] == "raw": sgd_params['mlp_type'] = 'raw' sgd_params['result_tag'] = sys.argv[2] mlp_params['dev_lams'] = [0.0 for l in mlp_params['dev_lams']] elif sys.argv[1] == "sde": sgd_params['mlp_type'] = 'sde' sgd_params['result_tag'] = sys.argv[2] elif sys.argv[1] == "dev": sgd_params['mlp_type'] = 'dev'
from sklearn.metrics import zero_one_loss from sklearn import preprocessing import sklearn.cross_validation as cv from sklearn import linear_model from sklearn.linear_model import SGDRegressor from sklearn.linear_model import SGDClassifier from load_data import convert_binary, load_usps from mylasso import * from load_data import load_mnist, convert_binary """ experiments on relation between sparse pattern and regularization weight lmda """ data = load_mnist() pos_ind = 6 neg_ind = 5 sig_D = 100 # lmda_list = [0.0005, 0.001, 0.01, 0.1, 0.3] x, y = convert_binary(data, pos_ind, neg_ind) n, p = x.shape x = x.astype(float) y = y.astype(float) min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1)) x = min_max_scaler.fit_transform(x) # xtest = min_max_scaler.transform(x) # ntrain = ytrain.size alphas, coefs, gaps = linear_model.lasso_path(x, y, n_alphas=5, return_models=False, fit_intercept=False) lmda_list = alphas[::-1]
batch_size_activation, batch_size_sgd, work_id) return result_list if __name__ == "__main__": data = "cifar10" if data == "cifar10": x_train, x_test, y_train, y_test = load_cifar() elif data == "cifar100": x_train, x_test, y_train, y_test = load_cifar_100() elif data == "mnist": x_train, x_test, y_train, y_test = load_mnist() num_processes = 1 start = timer() pair_list = [pair for pair in range(num_processes)] results = crossover_offspring(data, x_train, y_train, x_test, y_test, pair_list) pickle.dump(results, open("crossover_results.pickle", "wb")) end = timer() print(end - start)
from preprocessing import MakeOneHot from load_data import load_mnist from nn.model import Model from nn.layer import FC, Softmax, ReLU, Dropout from nn.optimizer import Adam, SGD from nn.loss import cross_entropy from nn.metrix import accuracy (X_train, y_train), (X_test, y_test) = load_mnist() X_train = X_train.reshape((X_train.shape[0], -1)) / 255 X_test = X_test.reshape((X_test.shape[0], -1)) / 255 transformer = MakeOneHot() y_train = transformer.fit_transform(y_train) y_test = transformer.transform(y_test) model = Model() model.add(FC(500, input_shape=784)) model.add(ReLU()) model.add(Dropout(0.5)) model.add(FC(150)) model.add(ReLU()) model.add(Dropout(0.5)) model.add(FC(50)) model.add(ReLU()) model.add(Dropout(0.5)) model.add(FC(10)) model.add(Softmax()) model.compile(Adam(eta=0.01), cross_entropy, accuracy)
import sklearn from ipywidgets import interact from load_data import load_mnist # Plot figures so that they can be shown in the notebook get_ipython().run_line_magic('matplotlib', 'inline') get_ipython().run_line_magic('config', "InlineBackend.figure_format = 'svg'") # The next cell loads the MNIST digits dataset. # In[3]: from load_data import load_mnist MNIST = load_mnist('./') images = MNIST['data'].astype(np.double) labels = MNIST['target'].astype(np.int) # For this assignment, you need to implement the two functions (`distance` and `angle`) in the cell below which compute the distance and angle between two vectors. # ### Distances # In[6]: # GRADED FUNCTION: DO NOT EDIT THIS LINE def distance(x0, x1): """Compute distance between two vectors x0, x1 using the dot product.
import tensorflow as tf import numpy as np import os import shutil import load_data import time x_train, x_validation, x_test, y_train, y_validation, y_test \ = load_data.load_mnist('./data/mnist/', seed = 0, as_image = False, scaling = True) BOARD_PATH = "./board/lab06-5_board" INPUT_DIM = np.size(x_train, 1) NCLASS = len(np.unique(y_train)) BATCH_SIZE = 32 TOTAL_EPOCH = 30 ntrain = len(x_train) nvalidation = len(x_validation) ntest = len(x_test) print("The number of train samples : ", ntrain) print("The number of validation samples : ", nvalidation) print("The number of test samples : ", ntest) def linear(x, output_dim, name): with tf.variable_scope(name): W = tf.get_variable(name='W', shape=[x.get_shape()[-1], output_dim], dtype=tf.float32,
if __name__ == '__main__': model_save_file = None model_save = {} test_classifiers = ['NB', 'KNN', 'LR', 'RF', 'DT', 'SVM', 'GBDT'] classifiers = {'NB': naive_bayes_classifier, 'KNN': knn_classifier, 'LR': logistic_regression_classifier, 'RF': random_forest_classifier, 'DT': decision_tree_classifier, 'SVM': svm_classifier, 'SVMCV': svm_cross_validation, 'GBDT': gradient_boosting_classifier } print('reading training and testing data...') X_train, y_train = load_mnist('E:\document\mnist', kind='train') X_test, y_test = load_mnist('E:\document\mnist', kind='t10k') num_train, num_feature = X_train.shape num_test, num_feature_y = X_test.shape is_binary_class = (len(np.unique(y_train)) == 2) print('data info *********************') print('#training data: {0}, #testing_data: {1}, dimension: {2}'.format(num_train, num_test, num_feature)) COMPONENT_NUM = 35 # pca 降维 print('----------------------------PCA-------------------') pca = PCA(n_components=COMPONENT_NUM) pca.fit(X_train) # Fit the model with X X_train = pca.transform(X_train) # Fit the model with X and 在X上完成降维. X_test = pca.transform(X_test) for classifier in test_classifiers:
import matplotlib as mpl import matplotlib.pyplot as plt import numpy as np import scipy import sklearn from ipywidgets import interact from load_data import load_mnist # The next cell loads the MNIST digits dataset. # In[2]: MNIST = load_mnist() images = MNIST['data'].astype(np.double) labels = MNIST['target'].astype(np.int) # In[3]: # Plot figures so that they can be shown in the notebook get_ipython().run_line_magic('matplotlib', 'inline') get_ipython().run_line_magic('config', "InlineBackend.figure_format = 'svg'") # For this assignment, you need to implement the two functions (`distance` and `angle`) in the cell below which compute the distance and angle between two vectors. # In[4]:
import load_data as ld import kNbr as knn path = 'F:\\Study\\Projects\\MNIST\\Data\\' X_train, Y_train = ld.load_mnist(path + 'train\\', 'train') X_test, Y_test = ld.load_mnist(path + 'test\\', 'test') print(len(X_train), len(Y_train)) #### K Nearest Neightbours ## K Nearest Neighbours with L2 Norm and no deskewing model, Y_pred = knn.simpleEuclideanL2(X_train, Y_train, X_test)