def run_cnn( arch_params, optimization_params , data_params, filename_params, visual_params, verbose = False, ): ##################### # Unpack Variables # ##################### results_file_name = filename_params [ "results_file_name" ] # Files that will be saved down on completion Can be used by the parse.m file error_file_name = filename_params [ "error_file_name" ] cost_file_name = filename_params [ "cost_file_name" ] confusion_file_name = filename_params [ "confusion_file_name" ] network_save_name = filename_params [ "network_save_name" ] dataset = data_params [ "loc" ] height = data_params [ "height" ] width = data_params [ "width" ] batch_size = data_params [ "batch_size" ] load_batches = data_params [ "load_batches" ] * batch_size batches2train = data_params [ "batches2train" ] batches2test = data_params [ "batches2test" ] batches2validate = data_params [ "batches2validate" ] channels = data_params [ "channels" ] mom_start = optimization_params [ "mom_start" ] mom_end = optimization_params [ "mom_end" ] mom_epoch_interval = optimization_params [ "mom_interval" ] mom_type = optimization_params [ "mom_type" ] initial_learning_rate = optimization_params [ "initial_learning_rate" ] learning_rate_decay = optimization_params [ "learning_rate_decay" ] ada_grad = optimization_params [ "ada_grad" ] fudge_factor = optimization_params [ "fudge_factor" ] l1_reg = optimization_params [ "l1_reg" ] l2_reg = optimization_params [ "l2_reg" ] rms_prop = optimization_params [ "rms_prop" ] rms_rho = optimization_params [ "rms_rho" ] rms_epsilon = optimization_params [ "rms_epsilon" ] squared_filter_length_limit = arch_params [ "squared_filter_length_limit" ] n_epochs = arch_params [ "n_epochs" ] validate_after_epochs = arch_params [ "validate_after_epochs" ] mlp_activations = arch_params [ "mlp_activations" ] cnn_activations = arch_params [ "cnn_activations" ] dropout = arch_params [ "dropout" ] column_norm = arch_params [ "column_norm" ] dropout_rates = arch_params [ "dropout_rates" ] nkerns = arch_params [ "nkerns" ] outs = arch_params [ "outs" ] filter_size = arch_params [ "filter_size" ] pooling_size = arch_params [ "pooling_size" ] num_nodes = arch_params [ "num_nodes" ] use_bias = arch_params [ "use_bias" ] random_seed = arch_params [ "random_seed" ] svm_flag = arch_params [ "svm_flag" ] visualize_flag = visual_params ["visualize_flag" ] visualize_after_epochs = visual_params ["visualize_after_epochs" ] n_visual_images = visual_params ["n_visual_images" ] display_flag = visual_params ["display_flag" ] # Random seed initialization. rng = numpy.random.RandomState(random_seed) ################# # Data Loading # ################# print "... loading data" # load matlab files as dataset. if data_params["type"] == 'mat': train_data_x, train_data_y, train_data_y1 = load_data_mat(dataset, batch = 1 , type_set = 'train') test_data_x, test_data_y, valid_data_y1 = load_data_mat(dataset, batch = 1 , type_set = 'test') # Load dataset for first epoch. valid_data_x, valid_data_y, test_data_y1 = load_data_mat(dataset, batch = 1 , type_set = 'valid') # Load dataset for first epoch. train_set_x = theano.shared(numpy.asarray(train_data_x, dtype=theano.config.floatX), borrow=True) train_set_y = theano.shared(numpy.asarray(train_data_y, dtype='int32'), borrow=True) train_set_y1 = theano.shared(numpy.asarray(train_data_y1, dtype=theano.config.floatX), borrow=True) test_set_x = theano.shared(numpy.asarray(test_data_x, dtype=theano.config.floatX), borrow=True) test_set_y = theano.shared(numpy.asarray(test_data_y, dtype='int32'), borrow=True) test_set_y1 = theano.shared(numpy.asarray(test_data_y1, dtype=theano.config.floatX), borrow=True) valid_set_x = theano.shared(numpy.asarray(valid_data_x, dtype=theano.config.floatX), borrow=True) valid_set_y = theano.shared(numpy.asarray(valid_data_y, dtype='int32'), borrow=True) valid_set_y1 = theano.shared(numpy.asarray(valid_data_y1, dtype=theano.config.floatX), borrow=True) # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size multi_load = True # load pkl data as is shown in theano tutorials elif data_params["type"] == 'pkl': data = load_data_pkl(dataset) train_set_x, train_set_y, train_set_y1 = data[0] valid_set_x, valid_set_y, valid_set_y1 = data[1] test_set_x, test_set_y, test_set_y1 = data[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size n_train_images = train_set_x.get_value(borrow=True).shape[0] n_test_images = test_set_x.get_value(borrow=True).shape[0] n_valid_images = valid_set_x.get_value(borrow=True).shape[0] n_train_batches_all = n_train_images / batch_size n_test_batches_all = n_test_images / batch_size n_valid_batches_all = n_valid_images / batch_size if (n_train_batches_all < batches2train) or (n_test_batches_all < batches2test) or (n_valid_batches_all < batches2validate): # You can't have so many batches. print "... !! Dataset doens't have so many batches. " raise AssertionError() multi_load = False # load skdata ( its a good library that has a lot of datasets) elif data_params["type"] == 'skdata': if (dataset == 'mnist' or dataset == 'mnist_noise1' or dataset == 'mnist_noise2' or dataset == 'mnist_noise3' or dataset == 'mnist_noise4' or dataset == 'mnist_noise5' or dataset == 'mnist_noise6' or dataset == 'mnist_bg_images' or dataset == 'mnist_bg_rand' or dataset == 'mnist_rotated' or dataset == 'mnist_rotated_bg') : print "... importing " + dataset + " from skdata" func = globals()['load_skdata_' + dataset] data = func() train_set_x, train_set_y, train_set_y1 = data[0] valid_set_x, valid_set_y, valid_set_y1 = data[1] test_set_x, test_set_y, test_set_y1 = data[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size n_train_images = train_set_x.get_value(borrow=True).shape[0] n_test_images = test_set_x.get_value(borrow=True).shape[0] n_valid_images = valid_set_x.get_value(borrow=True).shape[0] n_train_batches_all = n_train_images / batch_size n_test_batches_all = n_test_images / batch_size n_valid_batches_all = n_valid_images / batch_size if (n_train_batches_all < batches2train) or (n_test_batches_all < batches2test) or (n_valid_batches_all < batches2validate): # You can't have so many batches. print "... !! Dataset doens't have so many batches. " raise AssertionError() multi_load = False elif dataset == 'cifar10': print "... importing cifar 10 from skdata" data = load_skdata_cifar10() train_set_x, train_set_y, train_set_y1 = data[0] valid_set_x, valid_set_y, valid_set_y1 = data[1] test_set_x, test_set_y, test_set_y1 = data[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size multi_load = False elif dataset == 'caltech101': print "... importing caltech 101 from skdata" # shuffle the data total_images_in_dataset = 9144 rand_perm = numpy.random.permutation(total_images_in_dataset) # create a constant shuffle, so that data can be loaded in batchmode with the same random shuffle n_train_images = total_images_in_dataset / 3 n_test_images = total_images_in_dataset / 3 n_valid_images = total_images_in_dataset / 3 n_train_batches_all = n_train_images / batch_size n_test_batches_all = n_test_images / batch_size n_valid_batches_all = n_valid_images / batch_size if (n_train_batches_all < batches2train) or (n_test_batches_all < batches2test) or (n_valid_batches_all < batches2validate): # You can't have so many batches. print "... !! Dataset doens't have so many batches. " raise AssertionError() train_data_x, train_data_y = load_skdata_caltech101(batch_size = load_batches, rand_perm = rand_perm, batch = 1 , type_set = 'train' , height = height, width = width) test_data_x, test_data_y = load_skdata_caltech101(batch_size = load_batches, rand_perm = rand_perm, batch = 1 , type_set = 'test' , height = height, width = width) # Load dataset for first epoch. valid_data_x, valid_data_y = load_skdata_caltech101(batch_size = load_batches, rand_perm = rand_perm, batch = 1 , type_set = 'valid' , height = height, width = width) # Load dataset for first epoch. train_set_x = theano.shared(train_data_x, borrow=True) train_set_y = theano.shared(train_data_y, borrow=True) test_set_x = theano.shared(test_data_x, borrow=True) test_set_y = theano.shared(test_data_y, borrow=True) valid_set_x = theano.shared(valid_data_x, borrow=True) valid_set_y = theano.shared(valid_data_y, borrow=True) # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size multi_load = True # Just checking as a way to see if the intended dataset is indeed loaded. assert height*width*channels == train_set_x.get_value( borrow = True ).shape[1] assert batch_size >= n_visual_images if ada_grad is True: assert rms_prop is False elif rms_prop is True: assert ada_grad is False fudge_factor = rms_epsilon ###################### # BUILD NETWORK # ###################### print '... building the network' start_time = time.clock() # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of [int] if svm_flag is True: y1 = T.matrix('y1') # [-1 , 1] labels in case of SVM first_layer_input = x.reshape((batch_size, channels, height, width)) # Create first convolutional - pooling layers activity = [] # to record Cnn activities weights = [] conv_layers=[] filt_size = filter_size[0] pool_size = pooling_size[0] if not nkerns == []: conv_layers.append ( LeNetConvPoolLayer( rng, input = first_layer_input, image_shape=(batch_size, channels , height, width), filter_shape=(nkerns[0], channels , filt_size, filt_size), poolsize=(pool_size, pool_size), activation = cnn_activations[0], verbose = verbose ) ) activity.append ( conv_layers[-1].output ) weights.append ( conv_layers[-1].filter_img) # Create the rest of the convolutional - pooling layers in a loop next_in_1 = ( height - filt_size + 1 ) / pool_size next_in_2 = ( width - filt_size + 1 ) / pool_size for layer in xrange(len(nkerns)-1): filt_size = filter_size[layer+1] pool_size = pooling_size[layer+1] conv_layers.append ( LeNetConvPoolLayer( rng, input=conv_layers[layer].output, image_shape=(batch_size, nkerns[layer], next_in_1, next_in_2), filter_shape=(nkerns[layer+1], nkerns[layer], filt_size, filt_size), poolsize=(pool_size, pool_size), activation = cnn_activations[layer+1], verbose = verbose ) ) next_in_1 = ( next_in_1 - filt_size + 1 ) / pool_size next_in_2 = ( next_in_2 - filt_size + 1 ) / pool_size weights.append ( conv_layers[-1].filter_img ) activity.append( conv_layers[-1].output ) # Assemble fully connected laters if nkerns == []: fully_connected_input = first_layer_input else: fully_connected_input = conv_layers[-1].output.flatten(2) if len(dropout_rates) > 2 : layer_sizes =[] layer_sizes.append( nkerns[-1] * next_in_1 * next_in_2 ) for i in xrange(len(dropout_rates)-1): layer_sizes.append ( num_nodes[i] ) layer_sizes.append ( outs ) elif len(dropout_rates) == 1: layer_size = [ nkerns[-1] * next_in_1 * next_in_2, outs] else : layer_sizes = [ nkerns[-1] * next_in_1 * next_in_2, num_nodes[0] , outs] assert len(layer_sizes) - 1 == len(dropout_rates) # Just checking. """ Dropouts implemented from paper: Srivastava, Nitish, et al. "Dropout: A simple way to prevent neural networks from overfitting." The Journal of Machine Learning Research 15.1 (2014): 1929-1958. """ MLPlayers = MLP( rng=rng, input=fully_connected_input, layer_sizes=layer_sizes, dropout_rates=dropout_rates, activations=mlp_activations, use_bias = use_bias, svm_flag = svm_flag, verbose = verbose) # Build the expresson for the categorical cross entropy function. if svm_flag is False: cost = MLPlayers.negative_log_likelihood( y ) dropout_cost = MLPlayers.dropout_negative_log_likelihood( y ) else : cost = MLPlayers.negative_log_likelihood( y1 ) dropout_cost = MLPlayers.dropout_negative_log_likelihood( y1 ) # create theano functions for evaluating the graphs test_model = theano.function( inputs=[index], outputs=MLPlayers.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size]}) validate_model = theano.function( inputs=[index], outputs=MLPlayers.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size]}) prediction = theano.function( inputs = [index], outputs = MLPlayers.predicts, givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size]}) nll = theano.function( inputs = [index], outputs = MLPlayers.probabilities, givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size]}) # function to return activations of each image activities = theano.function ( inputs = [index], outputs = activity, givens = { x: train_set_x[index * batch_size: (index + 1) * batch_size] }) # Compute cost and gradients of the model wrt parameter params = [] for layer in conv_layers: params = params + layer.params params = params + MLPlayers.params output = dropout_cost + l1_reg * MLPlayers.dropout_L1 + l2_reg * MLPlayers.dropout_L2 if dropout else cost + l1_reg * MLPlayers.L1 + l2_reg * MLPlayers.L2 gradients = [] for param in params: gradient = T.grad( output ,param) gradients.append ( gradient ) # TO DO: Try implementing Adadelta also. # Compute momentum for the current epoch epoch = T.scalar() mom = ifelse(epoch <= mom_epoch_interval, mom_start*(1.0 - epoch/mom_epoch_interval) + mom_end*(epoch/mom_epoch_interval), mom_end) # learning rate eta = theano.shared(numpy.asarray(initial_learning_rate,dtype=theano.config.floatX)) # accumulate gradients for adagrad grad_acc = [] for param in params: eps = numpy.zeros_like(param.get_value(borrow=True), dtype=theano.config.floatX) grad_acc.append(theano.shared(eps, borrow=True)) # accumulate velocities for momentum velocities = [] for param in params: velocity = theano.shared(numpy.zeros(param.get_value(borrow=True).shape,dtype=theano.config.floatX)) velocities.append(velocity) # create updates for each combination of stuff updates = OrderedDict() print_flag = False for velocity, gradient, acc , param in zip(velocities, gradients, grad_acc, params): if ada_grad is True: """ Adagrad implemented from paper: John Duchi, Elad Hazan, and Yoram Singer. 2011. Adaptive subgradient methods for online learning and stochastic optimization. JMLR """ current_acc = acc + T.sqr(gradient) # Accumulates Gradient updates[acc] = current_acc # updates accumulation at timestamp elif rms_prop is True: """ Tieleman, T. and Hinton, G. (2012): Neural Networks for Machine Learning, Lecture 6.5 - rmsprop. Coursera. http://www.youtube.com/watch?v=O3sxAc4hxZU (formula @5:20)""" current_acc = rms_rho * acc + (1 - rms_rho) * T.sqr(gradient) updates[acc] = current_acc else: current_acc = 1 fudge_factor = 0 if mom_type == 0: # no momentum updates[velocity] = -(eta / T.sqrt(current_acc + fudge_factor)) * gradient #updates[velocity] = -1*eta*gradient # perform adagrad velocity update # this will be just added to parameters. elif mom_type == 1: # if polyak momentum """ Momentum implemented from paper: Polyak, Boris Teodorovich. "Some methods of speeding up the convergence of iteration methods." USSR Computational Mathematics and Mathematical Physics 4.5 (1964): 1-17. Adapted from Sutskever, Ilya, Hinton et al. "On the importance of initialization and momentum in deep learning." Proceedings of the 30th international conference on machine learning (ICML-13). 2013. equation (1) and equation (2)""" updates[velocity] = mom * velocity - (1.-mom) * ( eta / T.sqrt(current_acc+ fudge_factor)) * gradient elif mom_type == 2: # Nestrov accelerated gradient beta stage... """Nesterov, Yurii. "A method of solving a convex programming problem with convergence rate O (1/k2)." Soviet Mathematics Doklady. Vol. 27. No. 2. 1983. Adapted from https://blogs.princeton.edu/imabandit/2013/04/01/acceleratedgradientdescent/ Instead of using past params we use the current params as described in this link https://github.com/lisa-lab/pylearn2/pull/136#issuecomment-10381617,""" updates[velocity] = mom * velocity - (1.-mom) * ( eta / T.sqrt(current_acc + fudge_factor)) * gradient updates[param] = mom * updates[velocity] else: if print_flag is False: print_flag = True print "!! Unrecognized mometum type, switching to no momentum." updates[velocity] = -( eta / T.sqrt(current_acc+ fudge_factor) ) * gradient if mom_type != 2: stepped_param = param + updates[velocity] else: stepped_param = param + updates[velocity] + updates[param] if param.get_value(borrow=True).ndim == 2 and column_norm is True: """ constrain the norms of the COLUMNs of the weight, according to https://github.com/BVLC/caffe/issues/109 """ col_norms = T.sqrt(T.sum(T.sqr(stepped_param), axis=0)) desired_norms = T.clip(col_norms, 0, T.sqrt(squared_filter_length_limit)) scale = desired_norms / (1e-7 + col_norms) updates[param] = stepped_param * scale else: updates[param] = stepped_param if svm_flag is True: train_model = theano.function(inputs= [index, epoch], outputs=output, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y1: train_set_y1[index * batch_size:(index + 1) * batch_size]}, on_unused_input='ignore' ) else: train_model = theano.function(inputs= [index, epoch], outputs=output, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size]}, on_unused_input='ignore' ) decay_learning_rate = theano.function( inputs=[], outputs=eta, # Just updates the learning rates. updates={eta: eta * learning_rate_decay} ) momentum_value = theano.function ( inputs =[epoch], outputs = mom, ) end_time = time.clock() # setting up visualization stuff... shuffle_batch_ind = numpy.arange(batch_size) numpy.random.shuffle(shuffle_batch_ind) visualize_ind = shuffle_batch_ind[0:n_visual_images] #visualize_ind = range(n_visual_images) main_img_visual = True # create all directories required for saving results and data. if visualize_flag is True: if not os.path.exists('../visuals'): os.makedirs('../visuals') if not os.path.exists('../visuals/activities'): os.makedirs('../visuals/activities') for i in xrange(len(nkerns)): os.makedirs('../visuals/activities/layer_'+str(i)) if not os.path.exists('../visuals/filters'): os.makedirs('../visuals/filters') for i in xrange(len(nkerns)): os.makedirs('../visuals/filters/layer_'+str(i)) if not os.path.exists('../visuals/images'): os.makedirs('../visuals/images') if not os.path.exists('../results/'): os.makedirs ('../results') print "... -> building complete, took " + str((end_time - start_time)) + " seconds" ############### # TRAIN MODEL # ############### #pdb.set_trace() print "... training" start_time = time.clock() patience = numpy.inf # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant this_validation_loss = [] best_validation_loss = numpy.inf best_iter = 0 epoch_counter = 0 early_termination = False cost_saved = [] best_params = None iteration= 0 while (epoch_counter < n_epochs) and (not early_termination): epoch_counter = epoch_counter + 1 for batch in xrange (batches2train): if verbose is True: print "... -> Epoch: " + str(epoch_counter) + " Batch: " + str(batch+1) + " out of " + str(batches2train) + " batches" if multi_load is True: iteration= (epoch_counter - 1) * n_train_batches * batches2train + batch # Load data for this batch if verbose is True: print "... -> loading data for new batch" if data_params["type"] == 'mat': train_data_x, train_data_y, train_data_y1 = load_data_mat(dataset, batch = batch + 1 , type_set = 'train') elif data_params["type"] == 'skdata': if dataset == 'caltech101': train_data_x, train_data_y = load_skdata_caltech101(batch_size = load_batches, batch = batch + 1 , type_set = 'train', rand_perm = rand_perm, height = height, width = width ) # Do not use svm_flag for caltech 101 train_set_x.set_value(train_data_x ,borrow = True) train_set_y.set_value(train_data_y ,borrow = True) for minibatch_index in xrange(n_train_batches): if verbose is True: print "... -> Mini Batch: " + str(minibatch_index + 1) + " out of " + str(n_train_batches) cost_ij = train_model( minibatch_index, epoch_counter) cost_saved = cost_saved +[cost_ij] else: iteration= (epoch_counter - 1) * n_train_batches + batch cost_ij = train_model(batch, epoch_counter) cost_saved = cost_saved +[cost_ij] if epoch_counter % validate_after_epochs is 0: # Load Validation Dataset here. validation_losses = 0. if multi_load is True: # Load data for this batch for batch in xrange ( batches2test ): if data_params["type"] == 'mat': valid_data_x, valid_data_y, valid_data_y1 = load_data_mat(dataset, batch = batch + 1 , type_set = 'valid') elif data_params["type"] == 'skdata': if dataset == 'caltech101': valid_data_x, valid_data_y = load_skdata_caltech101(batch_size = load_batches, batch = batch + 1 , type_set = 'valid' , rand_perm = rand_perm, height = height, width = width ) # Do not use svm_flag for caltech 101 valid_set_x.set_value(valid_data_x,borrow = True) valid_set_y.set_value(valid_data_y,borrow = True) validation_losses = validation_losses + numpy.sum([[validate_model(i) for i in xrange(n_valid_batches)]]) this_validation_loss = this_validation_loss + [validation_losses] if verbose is True: if this_validation_loss[-1] < best_validation_loss : print "... -> epoch " + str(epoch_counter) + ", cost: " + str(numpy.mean(cost_saved[-1*n_train_batches:])) +", validation accuracy :" + str(float( batch_size * n_valid_batches * batches2validate - this_validation_loss[-1])*100/(batch_size*n_valid_batches*batches2validate)) + "%, learning_rate = " + str(eta.get_value(borrow=True))+ ", momentum = " +str(momentum_value(epoch_counter)) + " -> best thus far " else : print "... -> epoch " + str(epoch_counter) + ", cost: " + str(numpy.mean(cost_saved[-1*n_train_batches:])) +", validation accuracy :" + str(float( batch_size * n_valid_batches * batches2validate - this_validation_loss[-1])*100/(batch_size*n_valid_batches*batches2validate)) + "%, learning_rate = " + str(eta.get_value(borrow=True)) + ", momentum = " +str(momentum_value(epoch_counter)) else: if this_validation_loss[-1] < best_validation_loss : print "... -> epoch " + str(epoch_counter) + ", cost: " + str(numpy.mean(cost_saved[-1*n_train_batches:])) +", validation accuracy :" + str(float( batch_size * n_valid_batches * batches2validate - this_validation_loss[-1])*100/(batch_size*n_valid_batches*batches2validate)) + "% -> best thus far " else : print "... -> epoch " + str(epoch_counter) + ", cost: " + str(numpy.mean(cost_saved[-1*n_train_batches:])) +", validation accuracy :" + str(float( batch_size * n_valid_batches * batches2validate - this_validation_loss[-1])*100/(batch_size*n_valid_batches*batches2validate)) + "%" else: validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = this_validation_loss + [numpy.sum(validation_losses)] if verbose is True: if this_validation_loss[-1] < best_validation_loss : print "... -> epoch " + str(epoch_counter) + ", cost: " + str(cost_saved[-1]) +", validation accuracy :" + str(float(batch_size*n_valid_batches - this_validation_loss[-1])*100/(batch_size*n_valid_batches)) + "%, learning_rate = " + str(eta.get_value(borrow=True)) + ", momentum = " +str(momentum_value(epoch_counter)) + " -> best thus far " else: print "... -> epoch " + str(epoch_counter) + ", cost: " + str(cost_saved[-1]) +", validation accuracy :" + str(float(batch_size*n_valid_batches - this_validation_loss[-1])*100/(batch_size*n_valid_batches)) + "%, learning_rate = " + str(eta.get_value(borrow=True)) + ", momentum = " +str(momentum_value(epoch_counter)) else: if this_validation_loss[-1] < best_validation_loss : print "... -> epoch " + str(epoch_counter) + ", cost: " + str(cost_saved[-1]) +", validation accuracy :" + str(float(batch_size*n_valid_batches - this_validation_loss[-1])*100/(batch_size*n_valid_batches)) + "% -> best thus far " else: print "... -> epoch " + str(epoch_counter) + ", cost: " + str(cost_saved[-1]) +", validation accuracy :" + str(float(batch_size*n_valid_batches - this_validation_loss[-1])*100/(batch_size*n_valid_batches)) + "% " #improve patience if loss improvement is good enough if this_validation_loss[-1] < best_validation_loss * \ improvement_threshold: patience = max(patience, iteration* patience_increase) best_iter = iteration best_validation_loss = min(best_validation_loss, this_validation_loss[-1]) new_leanring_rate = decay_learning_rate() if visualize_flag is True: if epoch_counter % visualize_after_epochs is 0: # saving down images. if main_img_visual is False: for i in xrange(n_visual_images): curr_img = numpy.asarray(numpy.reshape(train_set_x.get_value( borrow = True )[visualize_ind[i]],[height, width, channels] ) * 255., dtype='uint8' ) if verbose is True: cv2.imshow("Image Number " +str(i) + "_label_" + str(train_set_y.eval()[visualize_ind[i]]), curr_img) cv2.imwrite("../visuals/images/image_" + str(i)+ "_label_" + str(train_set_y.eval()[visualize_ind[i]]) + ".jpg", curr_img ) main_img_visual = True # visualizing activities. activity = activities(0) for m in xrange(len(nkerns)): #For each layer loc_ac = '../visuals/activities/layer_' + str(m) + "/epoch_" + str(epoch_counter) +"/" if not os.path.exists(loc_ac): os.makedirs(loc_ac) current_activity = activity[m] for i in xrange(n_visual_images): # for each randomly chosen image .. visualize its activity visualize(current_activity[visualize_ind[i]], loc = loc_ac, filename = 'activity_' + str(i) + "_label_" + str(train_set_y.eval()[visualize_ind[i]]) +'.jpg' , show_img = display_flag) # visualizing the filters. for m in xrange(len(nkerns)): if m == 0: # first layer outpus. if channels == 3: # if the image is color, then first layer looks at color pictures and I can visualize the filters also as color. curr_image = weights[m].eval() if not os.path.exists('../visuals/filters/layer_'+str(m)+'/epoch_'+str(epoch_counter)): os.makedirs('../visuals/filters/layer_'+str(m)+'/epoch_'+str(epoch_counter)) visualize_color_filters(curr_image, loc = '../visuals/filters/layer_' + str(m) + '/' + 'epoch_' + str(epoch_counter) + '/' , filename = 'kernel_0.jpg' , show_img = display_flag) else: # visualize them as grayscale images. for i in xrange(weights[m].shape.eval()[1]): curr_image = weights[m].eval() [:,i,:,:] if not os.path.exists('../visuals/filters/layer_'+str(m)+'/epoch_'+str(epoch_counter)): os.makedirs('../visuals/filters/layer_'+str(m)+'/epoch_'+str(epoch_counter)) visualize(curr_image, loc = '../visuals/filters/layer_' + str(m) + '/' + 'epoch_' + str(epoch_counter) + '/' , filename = 'kernel_' + str(i) + '.jpg' , show_img = display_flag) else: for i in xrange(nkerns[m-1]): curr_image = weights[m].eval()[:,i,:,:] if not os.path.exists('../visuals/filters/layer_'+str(m)+'/epoch_'+str(epoch_counter)): os.makedirs('../visuals/filters/layer_'+str(m)+'/epoch_'+str(epoch_counter)) visualize(curr_image, loc = '../visuals/filters/layer_' + str(m) + '/' + 'epoch_' + str(epoch_counter) + '/' , filename = 'kernel_' + str(i) + '.jpg' , show_img = display_flag) if patience <= iteration: early_termination = True break save_network( 'network.pkl.gz', params, arch_params, data_params ) end_time = time.clock() print "... training complete, took " + str((end_time - start_time)/ 60.) +" minutes" ############### # TEST MODEL # ############### start_time = time.clock() print "... testing" wrong = 0 predictions = [] class_prob = [] labels = [] if multi_load is False: labels = test_set_y.eval().tolist() for mini_batch in xrange(batches2test): #print ".. Testing batch " + str(mini_batch) wrong = wrong + int(test_model(mini_batch)) predictions = predictions + prediction(mini_batch).tolist() class_prob = class_prob + nll(mini_batch).tolist() print "... -> Total test accuracy : " + str(float((batch_size*n_test_batches)-wrong )*100/(batch_size*n_test_batches)) + " % out of " + str(batch_size*n_test_batches) + " samples." else: for batch in xrange(batches2test): print ".. Testing batch " + str(batch) # Load data for this batch if data_params["type"] == 'mat': test_data_x, test_data_y, test_data_y1 = load_data_mat(dataset, batch = batch + 1 , type_set = 'test') elif data_params["type"] == 'skdata': if dataset == 'caltech101': test_data_x, test_data_y = load_skdata_caltech101(batch_size = load_batches, batch = batch + 1 , type_set = 'test', rand_perm = rand_perm, height = height, width = width ) test_set_x.set_value(test_data_x,borrow = True) test_set_y.set_value(test_data_y,borrow = True) labels = labels + test_set_y.eval().tolist() for mini_batch in xrange(n_test_batches): wrong = wrong + int(test_model(mini_batch)) predictions = predictions + prediction(mini_batch).tolist() class_prob = class_prob + nll(mini_batch).tolist() print "... -> Total test accuracy : " + str(float((batch_size*n_test_batches*batches2test)-wrong )*100/(batch_size*n_test_batches*batches2test)) + " % out of " + str(batch_size*n_test_batches*batches2test) + " samples." end_time = time.clock() correct = 0 confusion = numpy.zeros((outs,outs), dtype = int) for index in xrange(len(predictions)): if labels[index] is predictions[index]: correct = correct + 1 confusion[int(predictions[index]),int(labels[index])] = confusion[int(predictions[index]),int(labels[index])] + 1 # Save down data f = open(results_file_name, 'w') for i in xrange(len(predictions)): f.write(str(i)) f.write("\t") f.write(str(labels[i])) f.write("\t") f.write(str(predictions[i])) f.write("\t") for j in xrange(outs): f.write(str(class_prob[i][j])) f.write("\t") f.write('\n') f = open(error_file_name,'w') for i in xrange(len(this_validation_loss)): f.write(str(this_validation_loss[i])) f.write("\n") f.close() f = open(cost_file_name,'w') for i in xrange(len(cost_saved)): f.write(str(cost_saved[i])) f.write("\n") f.close() f = open(confusion_file_name, 'w') f.write(confusion) f.close() save_network( network_save_name, params, arch_params, data_params ) end_time = time.clock() print "Testing complete, took " + str((end_time - start_time)/ 60.) + " minutes" print "Confusion Matrix with accuracy : " + str(float(correct)/len(predictions)*100) print confusion print "Done" pdb.set_trace()
def run ( filename_params , verbose , # True prints in a lot of intermetediate steps, False keeps it to minimum. data_params , visual_params ): ##################### # Unpack Variables # ##################### visualize_flag = visual_params ["visualize_flag" ] n_visual_images = visual_params ["n_visual_images" ] display_flag = visual_params ["display_flag" ] results_file_name = filename_params [ "results_file_name" ] # Files that will be saved down on completion Can be used by the parse.m file confusion_file_name = filename_params [ "confusion_file_name" ] load_file_name = filename_params [ "load_file_name" ] dataset = data_params [ "loc" ] height = data_params [ "height" ] width = data_params [ "width" ] batch_size = data_params [ "batch_size" ] load_batches = data_params [ "load_batches" ] * batch_size batches2test = data_params [ "batches2test" ] channels = data_params [ "channels" ] ################# # Load Network # ################# params, arch_params = load_network(load_file_name) squared_filter_length_limit = arch_params [ "squared_filter_length_limit" ] n_epochs = arch_params [ "n_epochs" ] validate_after_epochs = arch_params [ "validate_after_epochs" ] mlp_activations = arch_params [ "mlp_activations" ] cnn_activations = arch_params [ "cnn_activations" ] dropout = arch_params [ "dropout" ] column_norm = arch_params [ "column_norm" ] dropout_rates = arch_params [ "dropout_rates" ] nkerns = arch_params [ "nkerns" ] outs = arch_params [ "outs" ] filter_size = arch_params [ "filter_size" ] pooling_size = arch_params [ "pooling_size" ] num_nodes = arch_params [ "num_nodes" ] use_bias = arch_params [ "use_bias" ] random_seed = arch_params [ "random_seed" ] svm_flag = arch_params [ "svm_flag" ] rng = numpy.random.RandomState(random_seed) ################# # Data Loading # ################# # To DO: Make this a class in the loaders.py so that this doesn't have to be copied into all the future codes. print "... loading data" # load matlab files as dataset. if data_params["type"] == 'mat': train_data_x, train_data_y, train_data_y1 = load_data_mat(dataset, batch = 1 , type_set = 'train') test_data_x, test_data_y, valid_data_y1 = load_data_mat(dataset, batch = 1 , type_set = 'test') # Load dataset for first epoch. valid_data_x, valid_data_y, test_data_y1 = load_data_mat(dataset, batch = 1 , type_set = 'valid') # Load dataset for first epoch. test_set_x = theano.shared(numpy.asarray(test_data_x, dtype=theano.config.floatX), borrow=True) test_set_y = theano.shared(numpy.asarray(test_data_y, dtype='int32'), borrow=True) test_set_y1 = theano.shared(numpy.asarray(test_data_y1, dtype=theano.config.floatX), borrow=True) # compute number of minibatches for training, validation and testing n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size multi_load = True # load pkl data as is shown in theano tutorials elif data_params["type"] == 'pkl': data = load_data_pkl(dataset) test_set_x, test_set_y, test_set_y1 = data[2] # compute number of minibatches for training, validation and testing n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size n_test_images = test_set_x.get_value(borrow=True).shape[0] n_test_batches_all = n_test_images / batch_size if (n_test_batches_all < batches2test): # You can't have so many batches. print "... !! Dataset doens't have so many batches. " raise AssertionError() multi_load = False # load skdata ( its a good library that has a lot of datasets) elif data_params["type"] == 'skdata': if (dataset == 'mnist' or dataset == 'mnist_noise1' or dataset == 'mnist_noise2' or dataset == 'mnist_noise3' or dataset == 'mnist_noise4' or dataset == 'mnist_noise5' or dataset == 'mnist_noise6' or dataset == 'mnist_bg_images' or dataset == 'mnist_bg_rand' or dataset == 'mnist_rotated' or dataset == 'mnist_rotated_bg') : print "... importing " + dataset + " from skdata" func = globals()['load_skdata_' + dataset] data = func() test_set_x, test_set_y, test_set_y1 = data[2] # compute number of minibatches for training, validation and testing n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size n_test_images = test_set_x.get_value(borrow=True).shape[0] n_test_batches_all = n_test_images / batch_size if (n_test_batches_all < batches2test): # You can't have so many batches. print "... !! Dataset doens't have so many batches. " raise AssertionError() multi_load = False elif dataset == 'cifar10': print "... importing cifar 10 from skdata" data = load_skdata_cifar10() test_set_x, test_set_y, test_set_y1 = data[2] # compute number of minibatches for training, validation and testing n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size multi_load = False elif dataset == 'caltech101': print "... importing caltech 101 from skdata" # shuffle the data total_images_in_dataset = 9144 rand_perm = numpy.random.permutation(total_images_in_dataset) # create a constant shuffle, so that data can be loaded in batchmode with the same random shuffle n_test_images = total_images_in_dataset / 3 n_test_batches_all = n_test_images / batch_size if (n_test_batches_all < batches2test): # You can't have so many batches. print "... !! Dataset doens't have so many batches. " raise AssertionError() test_data_x, test_data_y = load_skdata_caltech101(batch_size = load_batches, rand_perm = rand_perm, batch = 1 , type_set = 'test' , height = height, width = width) # Load dataset for first epoch. test_set_x = theano.shared(test_data_x, borrow=True) test_set_y = theano.shared(test_data_y, borrow=True) # compute number of minibatches for training, validation and testing n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size multi_load = True ###################### # BUILD NETWORK # ###################### print '... building the network' start_time = time.clock() # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of [int] if svm_flag is True: y1 = T.matrix('y1') # [-1 , 1] labels in case of SVM first_layer_input = x.reshape((batch_size, channels, height, width)) # Create first convolutional - pooling layers activity = [] # to record Cnn activities weights = [] conv_layers=[] filt_size = filter_size[0] pool_size = pooling_size[0] count = 0 if not nkerns == []: conv_layers.append ( LeNetConvPoolLayer( rng, input = first_layer_input, image_shape=(batch_size, channels , height, width), filter_shape=(nkerns[0], channels , filt_size, filt_size), poolsize=(pool_size, pool_size), activation = cnn_activations[0], W = params[count], b = params[count+1], verbose = verbose ) ) count = count + 2 activity.append ( conv_layers[-1].output ) weights.append ( conv_layers[-1].filter_img) # Create the rest of the convolutional - pooling layers in a loop next_in_1 = ( height - filt_size + 1 ) / pool_size next_in_2 = ( width - filt_size + 1 ) / pool_size for layer in xrange(len(nkerns)-1): filt_size = filter_size[layer+1] pool_size = pooling_size[layer+1] conv_layers.append ( LeNetConvPoolLayer( rng, input=conv_layers[layer].output, image_shape=(batch_size, nkerns[layer], next_in_1, next_in_2), filter_shape=(nkerns[layer+1], nkerns[layer], filt_size, filt_size), poolsize=(pool_size, pool_size), activation = cnn_activations[layer+1], W = params[count], b = params[count+1], verbose = verbose ) ) next_in_1 = ( next_in_1 - filt_size + 1 ) / pool_size next_in_2 = ( next_in_2 - filt_size + 1 ) / pool_size weights.append ( conv_layers[-1].filter_img ) activity.append( conv_layers[-1].output ) count = count + 2 # Assemble fully connected laters if nkerns == []: fully_connected_input = first_layer_input else: fully_connected_input = conv_layers[-1].output.flatten(2) if len(dropout_rates) > 2 : layer_sizes =[] layer_sizes.append( nkerns[-1] * next_in_1 * next_in_2 ) for i in xrange(len(dropout_rates)-1): layer_sizes.append ( num_nodes[i] ) layer_sizes.append ( outs ) elif len(dropout_rates) == 1: layer_sizes = [ nkerns[-1] * next_in_1 * next_in_2, outs] else : layer_sizes = [ nkerns[-1] * next_in_1 * next_in_2, num_nodes[0] , outs] assert len(layer_sizes) - 1 == len(dropout_rates) # Just checking. """ Dropouts implemented from paper: Srivastava, Nitish, et al. "Dropout: A simple way to prevent neural networks from overfitting." The Journal of Machine Learning Research 15.1 (2014): 1929-1958. """ MLPlayers = MLP( rng=rng, input=fully_connected_input, layer_sizes=layer_sizes, dropout_rates=dropout_rates, activations=mlp_activations, use_bias = use_bias, svm_flag = svm_flag, params = params[count:], verbose = verbose) # Build the expresson for the categorical cross entropy function. if svm_flag is False: cost = MLPlayers.negative_log_likelihood( y ) dropout_cost = MLPlayers.dropout_negative_log_likelihood( y ) else : cost = MLPlayers.negative_log_likelihood( y1 ) dropout_cost = MLPlayers.dropout_negative_log_likelihood( y1 ) # create theano functions for evaluating the graphs test_model = theano.function( inputs=[index], outputs=MLPlayers.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size]}) prediction = theano.function( inputs = [index], outputs = MLPlayers.predicts, givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size]}) nll = theano.function( inputs = [index], outputs = MLPlayers.probabilities, givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size]}) # function to return activations of each image activities = theano.function ( inputs = [index], outputs = activity, givens = { x: test_set_x[index * batch_size: (index + 1) * batch_size] }) ############### # TEST MODEL # ############### start_time = time.clock() print "... testing" wrong = 0 predictions = [] class_prob = [] labels = [] if multi_load is False: labels = test_set_y.eval().tolist() for mini_batch in xrange(batches2test): #print ".. Testing batch " + str(mini_batch) wrong = wrong + int(test_model(mini_batch)) predictions = predictions + prediction(mini_batch).tolist() class_prob = class_prob + nll(mini_batch).tolist() print "... -> Total test accuracy : " + str(float((batch_size*n_test_batches)-wrong )*100/(batch_size*n_test_batches)) + " % out of " + str(batch_size*n_test_batches) + " samples." else: for batch in xrange(batches2test): print ".. Testing batch " + str(batch) # Load data for this batch if data_params["type"] == 'mat': test_data_x, test_data_y, test_data_y1 = load_data_mat(dataset, batch = batch + 1 , type_set = 'test') elif data_params["type"] == 'skdata': if dataset == 'caltech101': test_data_x, test_data_y = load_skdata_caltech101(batch_size = load_batches, batch = batch + 1 , type_set = 'test', rand_perm = rand_perm, height = height, width = width ) test_set_x.set_value(test_data_x,borrow = True) test_set_y.set_value(test_data_y,borrow = True) labels = labels + test_set_y.eval().tolist() for mini_batch in xrange(n_test_batches): wrong = wrong + int(test_model(mini_batch)) predictions = predictions + prediction(mini_batch).tolist() class_prob = class_prob + nll(mini_batch).tolist() print "... -> Total test accuracy : " + str(float((batch_size*n_test_batches*batches2test)-wrong )*100/(batch_size*n_test_batches*batches2test)) + " % out of " + str(batch_size*n_test_batches*batches2test) + " samples." end_time = time.clock() correct = 0 confusion = numpy.zeros((outs,outs), dtype = int) for index in xrange(len(predictions)): if labels[index] is predictions[index]: correct = correct + 1 confusion[int(predictions[index]),int(labels[index])] = confusion[int(predictions[index]),int(labels[index])] + 1 # Save down data f = open(results_file_name, 'w') for i in xrange(len(predictions)): f.write(str(i)) f.write("\t") f.write(str(labels[i])) f.write("\t") f.write(str(predictions[i])) f.write("\t") for j in xrange(outs): f.write(str(class_prob[i][j])) f.write("\t") f.write('\n') f = open(confusion_file_name, 'w') f.write(confusion) f.close() end_time = time.clock() print "Testing complete, took " + str((end_time - start_time)/ 60.) + " minutes" print "Confusion Matrix with accuracy : " + str(float(correct)/len(predictions)*100) print confusion print "Done" pdb.set_trace()