import theano import theano.tensor as T import numpy as np import sys sys.path.insert(0, '../data_loader/') import load # load data x_train, t_train, x_test, t_test = load.cifar10(dtype=theano.config.floatX) labels_test = np.argmax(t_test, axis=1) # define symbolic Theano variables x = T.matrix() t = T.matrix() # define model: logistic regression def floatX(x): return np.asarray(x, dtype=theano.config.floatX) def init_weights(shape): return theano.shared(floatX(np.random.randn(*shape) * 0.1)) def model(x, w): return T.nnet.softmax(T.dot(x, w)) w = init_weights((32 * 32, 10)) p_y_given_x = model(x, w) y = T.argmax(p_y_given_x, axis=1) cost = T.mean(T.nnet.categorical_crossentropy(p_y_given_x, t))
import theano import theano.tensor as T import numpy as np import matplotlib.pyplot as plt import sklearn.datasets as ds import load # load data x_train, t_train, x_test, t_test = load.cifar10(dtype=theano.config.floatX) labels_test = np.argmax( t_test, axis=1 ) # Gives the argument with maximun value. eg. [0, 2, 1, 0] -> Returns 1 since value = 2 is the max arg # visualize data # Defin symbolic variables in Theano x = T.matrix() t = T.matrix() # define model: neural network def floatX(x): return np.asarray(x, dtype=theano.config.floatX) def init_weights(shape): return theano.shared(floatX(np.random.randn(*shape) * 0.1)) # Define the Back-propagation algoritm (gradient descent) to update the weights def sgd(cost, params, learning_rate):
import theano import theano.tensor as T import numpy as np import layers from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams import load from theano.tensor.signal.pool import pool_2d from theano.tensor.nnet import conv2d x_train, t_train, x_test, t_test = load.cifar10(dtype=theano.config.floatX, grayscale=False) labels = np.argmax(t_test, axis=1) x_train = x_train.reshape((x_train.shape[0], 3, 32, 32)) x_test = x_test.reshape((x_test.shape[0], 3, 32, 32)) # define symbolic Theano variables x = T.tensor4() #ireg = T.scalar() #selector = T.scalar() #prepare weight #BC architecture is 2X128C3 - MP2 - 2x256C3 - MP2 - 2x512C3 - MP2 - 2x1024FC - 10 params = layers.loadParams('dropout_removed_88p12.save', 6, 2) def feedForward(x, params): l = 0 current_params = params[l] c1 = conv2d(x, current_params[0]) + current_params[1].dimshuffle( 'x', 0, 'x', 'x')
tanh = activations.Tanh() softplus = activations.Softplus() bce = T.nnet.binary_crossentropy ################### # SET INITIALIZER # ################### gifn = inits.Normal(scale=0.02) difn = inits.Normal(scale=0.02) gain_ifn = inits.Normal(loc=1., scale=0.02) bias_ifn = inits.Constant(c=0.) ################# # LOAD DATA SET # ################# tr_data, te_data, tr_stream, val_stream, te_stream = cifar10(ntrain=ntrain, window_size=(npx, npx)) ################### # GET DATA STATIC # ################### tr_handle = tr_data.open() vaX, = tr_data.get_data(tr_handle, slice(0, 10000)) vaX = transform(vaX) ##################### # INITIALIZE PARAMS # ##################### nz = 100 # NUM OF HIDDENS ngf = ndf = 128 # NUM OF MINIMAL FILTERS # FOR GENERATOR # LAYER 1 (LINEAR)
save_model(tensor_params_list=generator_params[0]+generator_params[1]+energy_params[0]+energy_params[1], save_to=save_as) if __name__=="__main__": model_config_dict = OrderedDict() model_config_dict['batch_size'] = 128 model_config_dict['num_display'] = 16*16 model_config_dict['hidden_distribution'] = 1. model_config_dict['epochs'] = 200 ################# # LOAD DATA SET # ################# tr_data, te_data, data_stream, te_stream = cifar10(batch_size=model_config_dict['batch_size']) expert_size_list = [1024] hidden_size_list = [100] num_filters_list = [128] lr_list = [1e-4] lambda_eng_list = [1e-5] lambda_gen_list = [1e-5] for lr in lr_list: for num_filters in num_filters_list: for hidden_size in hidden_size_list: for expert_size in expert_size_list: for lambda_eng in lambda_eng_list: for lambda_gen in lambda_gen_list: model_config_dict['hidden_size'] = hidden_size
tanh = activations.Tanh() softplus = activations.Softplus() bce = T.nnet.binary_crossentropy ################### # SET INITIALIZER # ################### gifn = inits.Normal(scale=0.02) difn = inits.Normal(scale=0.02) gain_ifn = inits.Normal(loc=1., scale=0.02) bias_ifn = inits.Constant(c=0.) ################# # LOAD DATA SET # ################# tr_data, te_data, tr_stream, val_stream, te_stream = cifar10(window_size=(npx, npx)) ################### # GET DATA STATIC # ################### tr_handle = tr_data.open() vaX, = tr_data.get_data(tr_handle, slice(0, 10000)) vaX = transform(vaX) ##################### # INITIALIZE PARAMS # ##################### nz = 100 # NUM OF HIDDENS ngf = ndf = 128 # NUM OF MINIMAL FILTERS # FOR GENERATOR # LAYER 1 (LINEAR)
def main(train=False, train_lim=False, model='cifar', preproc=True, num_epochs=200, startepoch=199, bits=(16, ), precision=((6, ), ), singleprec=False): dataloc = "Models/CIFAR10bis/" #dataloc = "/mnt/storage/users/ptimmerman/quantize/Models/CIFAR10/" # Load the dataset print("Loading data...") if preproc: X_trainnoaug, y_trainnoaug, X_test, y_test = load.cifar10pre( dtype=theano.config.floatX, grayscale=False) else: X_trainnoaug, y_trainnoaug, X_test, y_test = load.cifar10( dtype=theano.config.floatX, grayscale=False) # Extracting validationsets X_trainnoaug, X_val = X_trainnoaug[:-10000], X_trainnoaug[-10000:] y_trainnoaug, y_val = y_trainnoaug[:-10000], y_trainnoaug[-10000:] # Reshape data X_trainnoaug = X_trainnoaug.reshape((-1, 3, 32, 32)) X_test = X_test.reshape((-1, 3, 32, 32)) X_val = X_val.reshape((-1, 3, 32, 32)) X_val = resizeimages(X_val) X_test = resizeimages(X_test) # Prepare Theano variables for inputs and targets input_var = T.tensor4('inputs') target_var = T.ivector('targets') prediction_var = T.fmatrix('predictions') bit_var = T.scalar('bits') prec_var = T.scalar('precision') epsilon = T.scalar() # Create neural network model (depending on first command line parameter) print("Building model and compiling functions...") if model == 'cifar': network = build_model(input_var) layers = lasagne.layers.get_all_layers(network) limnetwork = lasagne.layers.DenseLayer(lasagne.layers.dropout( copy.deepcopy(layers[-3]), p=.5), num_units=10, nonlinearity=None) limlayers = lasagne.layers.get_all_layers(limnetwork) else: print("Unrecognized model type %r." % model) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, # disabling dropout layers. test_prediction = lasagne.layers.get_output(network, deterministic=True) test_prediction = theano.printing.Print("pred: ")(test_prediction) test_loss = lasagne.objectives.categorical_crossentropy( T.clip(test_prediction, 0.00000001, 1.0 - 0.00000001), target_var) test_loss = test_loss.mean() # As a bonus, also create an expression for the classification accuracy: predprint = theano.printing.Print("pred")(T.argmax(test_prediction, axis=1)) test_acc = T.mean( T.eq(predprint, target_var), dtype=theano.config.floatX ) # Create a loss expression for training, i.e., a scalar objective we want # to minimize (for our multi-class problem, it is the cross-entropy loss): test_prediction_lim, debug = get_output_lim(limnetwork, bit_var, prec_var, input_var, deterministic=True, singleprec=singleprec) test_prediction_lim = theano.printing.Print("pred: ")(test_prediction_lim) test_loss_lim = lasagne.objectives.categorical_crossentropy( T.clip(test_prediction_lim, epsilon, 1.0 - epsilon), target_var) test_loss_lim = test_loss_lim.mean() predprint_lim = theano.printing.Print("pred")(T.argmax(test_prediction_lim, axis=1)) test_acc_lim = T.mean(T.eq(predprint_lim, target_var), dtype=theano.config.floatX) # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input_var, target_var], [test_loss, test_acc], allow_input_downcast=True) val_fn_lim = theano.function( [input_var, target_var, bit_var, prec_var, epsilon], [test_loss_lim, test_acc_lim, debug], allow_input_downcast=True) totalerr = np.empty((num_epochs + 1, np.array(precision).size + 1)) totalacc = np.empty((num_epochs + 1, np.array(precision).size + 1)) print("Loading model...") with np.load('model9.npz') as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network, param_values) lasagne.layers.set_all_param_values(limnetwork, param_values) bit = 7 prec = 3 eps = 1.0 / (1 << prec) # writeparams(param_values, limnetwork) lim_params = lasagne.layers.get_all_params(limnetwork) lim_params = lim_precision_params(lim_params, bit, prec) print(lim_params[0]) lasagne.layers.set_all_param_values(limnetwork, lim_params) # print("Saving model...") # np.savez('testmodelmc_lim.npz', *lasagne.layers.get_all_param_values(limnetwork)) input = X_val[:100] target = y_val[:100] # val_err = 0 # val_acc = 0 # val_batches = 0 # print("Starting validation...") # for batch in iterate_minibatches(X_val, y_val, 500, shuffle=False): # print("batchnr: ", val_batches+1) # inputs, targets = batch # err, acc = val_fn(inputs, targets) # print(err, acc) # val_err += err # val_acc += acc # val_batches += 1 input = lim_precision_inputs(input, bit, prec) # print(input.shape) # f = open('inputmc_lim.csv', 'w') # for inp in input: # for layer in inp: # for row in layer: # for elem in row: # f.write(str(elem) + ',') # f.write("\n") # print(target.shape) # f = open('targetmc_lim.csv', 'w') # for targets in target: # f.write(str(targets) + ",\n") # err, acc = val_fn(input, target) # print(input[0]) err, acc, debug = val_fn_lim(input, target, bit, prec, eps) print("err: ", err, "--- acc: ", acc)
""" CIFAR10 CNN model """ import os import theano from theano import tensor, printing from theano.tensor.nnet import conv2d from theano.tensor.signal.pool import pool_2d import numpy import load #get training, testing data #training, testing data X_train, Y_train, X_test, Y_test= load.cifar10(dtype=theano.config.floatX) #data are flatten into array, reshape them back X_train=X_train.reshape(-1,3,32,32) X_test=X_test.reshape(-1,3,32,32) label_train=numpy.argmax(Y_train,axis=1) #define symbolic in theano x=tensor.tensor4() t=tensor.matrix() #CNN model #two conv followed by two max pool, then 2 fully connect def CNN(x,c_l1,c_l2,f_l1,f_l2): conv1=tensor.nnet.relu(conv2d(x,c_l1)) #default stride=1 --subsample=(1,1) pool1=pool_2d(conv1,(2,2),st=(2,2),ignore_border=True) #default maxpool
def main(train = False, train_lim = False, model='cifar',preproc = True, num_epochs=200,startepoch=199, bits = (16,), precision=((6,),), singleprec = False): dataloc = "Models/CIFAR10bis/" #dataloc = "/mnt/storage/users/ptimmerman/quantize/Models/CIFAR10/" # Load the dataset print("Loading data...") if preproc: X_trainnoaug, y_trainnoaug, X_test, y_test = load.cifar10pre(dtype=theano.config.floatX, grayscale=False) else: X_trainnoaug, y_trainnoaug, X_test, y_test = load.cifar10(dtype=theano.config.floatX, grayscale=False) # Extracting validationsets X_trainnoaug, X_val = X_trainnoaug[:-10000], X_trainnoaug[-10000:] y_trainnoaug, y_val = y_trainnoaug[:-10000], y_trainnoaug[-10000:] # Reshape data X_trainnoaug = X_trainnoaug.reshape((-1, 3, 32, 32)) X_test = X_test.reshape((-1, 3, 32, 32)) X_val = X_val.reshape((-1, 3, 32, 32)) X_val = resizeimages(X_val) X_test = resizeimages(X_test) # Prepare Theano variables for inputs and targets input_var = T.tensor4('inputs') target_var = T.ivector('targets') prediction_var = T.fmatrix('predictions') bit_var = T.scalar('bits') prec_var = T.scalar('precision') epsilon = T.scalar() # Create neural network model (depending on first command line parameter) print("Building model and compiling functions...") if model == 'cifar': network = build_model(input_var) layers = lasagne.layers.get_all_layers(network) limnetwork = lasagne.layers.DenseLayer( lasagne.layers.dropout(copy.deepcopy(layers[-3]), p=.5), num_units=10, nonlinearity=None) limlayers = lasagne.layers.get_all_layers(limnetwork) else: print("Unrecognized model type %r." % model) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, # disabling dropout layers. test_prediction = lasagne.layers.get_output(network, deterministic=True) test_prediction = theano.printing.Print("pred: ")(test_prediction) test_loss = lasagne.objectives.categorical_crossentropy(T.clip(test_prediction, 0.00000001, 1.0-0.00000001), target_var) test_loss = test_loss.mean() # As a bonus, also create an expression for the classification accuracy: predprint = theano.printing.Print("pred")(T.argmax(test_prediction, axis=1)) test_acc = T.mean(T.eq(predprint, target_var), dtype=theano.config.floatX) # Create a loss expression for training, i.e., a scalar objective we want # to minimize (for our multi-class problem, it is the cross-entropy loss): test_prediction_lim,debug = get_output_lim(limnetwork, bit_var, prec_var, input_var, deterministic=True, singleprec = singleprec) test_prediction_lim = theano.printing.Print("pred: ")(test_prediction_lim) test_loss_lim = lasagne.objectives.categorical_crossentropy(T.clip(test_prediction_lim, epsilon,1.0-epsilon), target_var) test_loss_lim = test_loss_lim.mean() predprint_lim = theano.printing.Print("pred")(T.argmax(test_prediction_lim, axis=1)) test_acc_lim = T.mean(T.eq(predprint_lim, target_var), dtype=theano.config.floatX) # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input_var, target_var], [test_loss, test_acc], allow_input_downcast=True) val_fn_lim = theano.function([input_var, target_var, bit_var, prec_var, epsilon], [test_loss_lim, test_acc_lim, debug], allow_input_downcast=True) totalerr = np.empty((num_epochs+1, np.array(precision).size+1)) totalacc = np.empty((num_epochs+1, np.array(precision).size+1)) print("Loading model...") with np.load('model9.npz') as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network, param_values) lasagne.layers.set_all_param_values(limnetwork, param_values) bit = 7 prec = 3 eps = 1.0/(1<<prec) # writeparams(param_values, limnetwork) lim_params = lasagne.layers.get_all_params(limnetwork) lim_params = lim_precision_params(lim_params, bit, prec) print(lim_params[0]) lasagne.layers.set_all_param_values(limnetwork, lim_params) # print("Saving model...") # np.savez('testmodelmc_lim.npz', *lasagne.layers.get_all_param_values(limnetwork)) input = X_val[:100] target = y_val[:100] # val_err = 0 # val_acc = 0 # val_batches = 0 # print("Starting validation...") # for batch in iterate_minibatches(X_val, y_val, 500, shuffle=False): # print("batchnr: ", val_batches+1) # inputs, targets = batch # err, acc = val_fn(inputs, targets) # print(err, acc) # val_err += err # val_acc += acc # val_batches += 1 input = lim_precision_inputs(input, bit, prec) # print(input.shape) # f = open('inputmc_lim.csv', 'w') # for inp in input: # for layer in inp: # for row in layer: # for elem in row: # f.write(str(elem) + ',') # f.write("\n") # print(target.shape) # f = open('targetmc_lim.csv', 'w') # for targets in target: # f.write(str(targets) + ",\n") # err, acc = val_fn(input, target) # print(input[0]) err, acc, debug = val_fn_lim(input, target, bit, prec, eps) print("err: ", err, "--- acc: ", acc)
def main(train = False, model='cifar', num_epochs=200,startepoch=199, bits = (8,16,), precision=((3,4,5,6,7,8,),(3,4,5,6,7,8,),), precisionchange = 1): dataloc = "Models/CIFAR10bis/" #dataloc = "/mnt/storage/users/ptimmerman/quantize/Models/CIFAR10/" # Load the dataset print("Loading data...") X_trainnoaug, y_trainnoaug, X_test, y_test = load.cifar10(dtype=theano.config.floatX, grayscale=False) # Extracting validationsets X_trainnoaug, X_val = X_trainnoaug[:-10000], X_trainnoaug[-10000:] y_trainnoaug, y_val = y_trainnoaug[:-10000], y_trainnoaug[-10000:] # Reshape data X_trainnoaug = X_trainnoaug.reshape((-1, 3, 32, 32)) X_test = X_test.reshape((-1, 3, 32, 32)) X_val = X_val.reshape((-1, 3, 32, 32)) X_val = resizeimages(X_val) X_test = resizeimages(X_test) # Prepare Theano variables for inputs and targets input_var = T.tensor4('inputs') target_var = T.ivector('targets') prediction_var = T.fmatrix('predictions') epsilon = T.scalar() # Create neural network model (depending on first command line parameter) print("Building model and compiling functions...") if model == 'cifar': network = build_model(input_var) layers = lasagne.layers.get_all_layers(network) limnetwork = lasagne.layers.DenseLayer( lasagne.layers.dropout(layers[-3], p=.5), num_units=10, nonlinearity=None) limlayers = lasagne.layers.get_all_layers(limnetwork) else: print("Unrecognized model type %r." % model) # Create a loss expression for training, i.e., a scalar objective we want # to minimize (for our multi-class problem, it is the cross-entropy loss): prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() # We could add some weight decay as well here, see lasagne.regularization. # Create update expressions for training, i.e., how to modify the # parameters at each training step. Here, we'll use Stochastic Gradient # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more. params = lasagne.layers.get_all_params(network, trainable=True) updates = lasagne.updates.nesterov_momentum( loss, params, learning_rate=0.01, momentum=0.9) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, # disabling dropout layers. test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy(T.clip(test_prediction, 0.00000001, 1.0-0.00000001), target_var) test_loss = test_loss.mean() # As a bonus, also create an expression for the classification accuracy: test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_fn = theano.function([input_var, target_var], loss, updates=updates, allow_input_downcast=True) # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input_var, target_var], [test_loss, test_acc], allow_input_downcast=True) limtest_loss = lasagne.objectives.categorical_crossentropy(T.clip(prediction_var, epsilon,1.0-epsilon), target_var) limtest_loss = limtest_loss.mean() limtest_acc = T.mean(T.eq(T.argmax(prediction_var, axis=1), target_var), dtype=theano.config.floatX) val_lim_prec = theano.function([prediction_var, target_var, epsilon], [limtest_loss, limtest_acc], allow_input_downcast=True) totalerr = np.empty((num_epochs+1, np.array(precision).size+1)) totalacc = np.empty((num_epochs+1, np.array(precision).size+1)) if startepoch>0: with np.load(dataloc + 'model' + str(startepoch-1) + '.npz') as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network, param_values) # Finally, launch the training loop. print("Starting training...") # We iterate over epochs: for epoch in range(startepoch,num_epochs): start_time = time.time() if train: print("Augmenting data...") X_train, y_train = augmentdata(X_trainnoaug, y_trainnoaug) # In each epoch, we do a full pass over the training data: train_err = 0 train_batches = 0 for batch in iterate_minibatches(X_train, y_train, 500, shuffle=True): inputs, targets = batch train_err += train_fn(inputs, targets) train_batches += 1 np.savez(dataloc + 'model' + str(epoch) + '.npz', *lasagne.layers.get_all_param_values(network)) else: with np.load(dataloc + 'model' + str(epoch) + '.npz') as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network, param_values) # And a full pass over the validation data: val_err = 0 val_acc = 0 val_batches = 0 limval_err = np.zeros(np.array(precision).shape) limval_acc = np.zeros(np.array(precision).shape) current_params = copy.deepcopy(lasagne.layers.get_all_params(network)) old_params = lasagne.layers.get_all_params(network) for index, param in np.ndenumerate(current_params): old_params[index[0]] = np.asarray(current_params[index[0]].get_value(), dtype=theano.config.floatX) print("Starting validation...") for batch in iterate_minibatches(X_val, y_val, 500, shuffle=False): print("batchnr: ", val_batches+1) inputs, targets = batch err, acc = val_fn(inputs, targets) val_err += err val_acc += acc val_batches += 1 for bit,precisions in zip(bits,precision): for prec in precisions: print(bit, prec) # print("limitparamsbegin: ", time.time()) lim_params = lim_precision_params(current_params, bit, prec) lasagne.layers.set_all_param_values(limnetwork, lim_params) # print("limitparamsend: ", time.time()) for batch in iterate_minibatches(X_val, y_val, 500, shuffle=False): inputs, targets = batch # print("limitinputsbegin: ", time.time()) lim_inputs = lim_precision_inputs(inputs, bit, prec) # print("limitinputsend: ", time.time()) output = lim_inputs layernr = 1 # print("layersbegin: ", time.time()) for layer in limlayers[1:]: # print("layerbegin: ", time.time()) # print(layernr) # print(layer) # print(np.asarray(lasagne.layers.get_all_params(layer)[-2].eval())) output = layer.get_output_for(output, deterministic = True) # print(np.asarray(output.eval())) if layernr == 1 or layernr == 4 or layernr == 7 or layernr == 8: output = np.asarray(np.trunc((np.asarray(output.eval()) * (1 << prec)) % (1 << bit) + 0.5) * (1.0 / (1 << prec)), dtype=theano.config.floatX) output = theano.shared(output) layernr += 1 # print(np.asarray(output.eval())) # print("layerend: ", time.time()) # print("layersend: ", time.time()) lim_outputs = np.asarray(output.eval()) # print(lim_outputs[:10]) for index, out in np.ndenumerate(lim_outputs): if out >= 0: lim_outputs[index] = (np.trunc(out * (1 << prec) + 0.5) % (1 << bit)) * (1.0 / (1 << prec)) else: lim_outputs[index] = -(-np.trunc(out * (1 << prec) + 0.5) % (1 << bit)) * (1.0 / (1 << prec)) # print(lim_outputs[:10]) softmax = lasagne.nonlinearities.softmax(lim_outputs).eval()# * (1.0 / (1 << prec))).eval() softmax = np.round(softmax * (1 << bit)) * (1.0 / (1 << bit)) #Use maximum precision for softmax # print(softmax[:10]) # print(targets[:10]) # print(np.argmax(softmax,axis=1)[:10]) eps = (1.0 / (1 << bit)) err, acc = val_lim_prec(softmax, targets, eps) limval_err[bits.index(bit)][precisions.index(prec)] += err limval_acc[bits.index(bit)][precisions.index(prec)] += acc lasagne.layers.set_all_param_values(network, old_params) # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) if train: print(" training loss:\t\t{:.6f}".format(train_err / train_batches)) print(" validation loss:\t\t{:.6f}".format(val_err / val_batches)) print(" validation accuracy:\t\t{:.2f} %".format(val_acc / val_batches * 100)) for bit,precisions in zip(bits,precision): for prec in precisions: print(str(bit) + " bits precision, ", str(prec), " bits after point") print(" limval loss:\t\t{:.6f}".format(limval_err[bits.index(bit)][precisions.index(prec)] / val_batches)) print(" limval accuracy:\t\t{:.2f} %".format( limval_acc[bits.index(bit)][precisions.index(prec)] / val_batches * 100)) concaterr = [] concatacc = [] for err, acc in zip(limval_err[:], limval_acc[:]): concaterr = np.concatenate((concaterr, err[:] / val_batches)) concatacc = np.concatenate((concatacc, acc[:] / val_batches * 100)) concaterr = np.concatenate((concaterr, [val_err / val_batches])) concatacc = np.concatenate((concatacc, [val_acc / val_batches * 100])) totalerr[epoch] = concaterr totalacc[epoch] = concatacc np.savetxt(dataloc + str(bits) + str(precision) + str(precisionchange) + "err.csv", totalerr, delimiter=",") np.savetxt(dataloc + str(bits) + str(precision) + str(precisionchange) + "acc.csv", totalacc, delimiter=",") # After training, we compute and print the test error: test_err = 0 test_acc = 0 test_batches = 0 limtest_err = np.zeros(np.array(precision).shape) limtest_acc = np.zeros(np.array(precision).shape) if train: np.savez(dataloc + 'finalmodel.npz', *lasagne.layers.get_all_param_values(network)) else: with np.load(dataloc + 'finalmodel.npz') as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network, param_values) current_params = copy.deepcopy(lasagne.layers.get_all_params(network)) old_params = lasagne.layers.get_all_params(network) for index, param in np.ndenumerate(current_params): old_params[index[0]] = np.asarray(current_params[index[0]].get_value(), dtype=theano.config.floatX) for batch in iterate_minibatches(X_test, y_test, 500, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) test_err += err test_acc += acc test_batches += 1 for bit, precisions in zip(bits,precision): for prec in precisions: lim_params = lim_precision_params(current_params, bit, prec) lasagne.layers.set_all_param_values(limnetwork, lim_params) for batch in iterate_minibatches(X_test, y_test, 500, shuffle=False): lim_inputs = lim_precision_inputs(inputs, bit, prec) output = lim_inputs layernr = 1 for layer in limlayers[1:]: output = layer.get_output_for(output, deterministic = True) if layernr == 1 or layernr == 4 or layernr == 7 or layernr == 8: output = np.asarray(np.trunc((np.asarray(output.eval()) * (1 << prec)) % (1 << bit) + 0.5) * (1.0 / (1 << prec)), dtype=theano.config.floatX) output = theano.shared(output) layernr += 1 lim_outputs = np.asarray(output.eval()) for index, out in np.ndenumerate(lim_outputs): if out >= 0: lim_outputs[index] = (np.trunc(out * (1 << prec) + 0.5) % (1 << bit)) * (1.0 / (1 << prec)) else: lim_outputs[index] = -(-np.trunc(out * (1 << prec) + 0.5) % (1 << bit)) * (1.0 / (1 << prec)) softmax = lasagne.nonlinearities.softmax(lim_outputs).eval()# * (1.0 / (1 << prec))).eval() softmax = np.round(softmax * (1 << bit)) * (1.0 / (1 << bit)) #Use maximum precision for softmax eps = (1.0 / (1 << bit)) err, acc = val_lim_prec(softmax, targets, eps) limtest_err[bits.index(bit)][precisions.index(prec)] += err limtest_acc[bits.index(bit)][precisions.index(prec)] += acc print("Final results:") print(" test loss:\t\t\t{:.6f}".format(test_err / test_batches)) print(" test accuracy:\t\t{:.2f} %".format( test_acc / test_batches * 100)) for bit, precisions in zip(bits,precision): for prec in precisions: print(str(bit) + " bits precision, ", str(prec), " bits after point") print(" limtest loss:\t\t\t{:.6f}".format(limtest_err[bits.index(bit)][precisions.index(prec)] / test_batches)) print(" limtest accuracy:\t\t{:.2f} %".format( limtest_acc[bits.index(bit)][precisions.index(prec)] / test_batches * 100)) concaterr = [] concatacc = [] for err, acc in zip(limtest_err[:], limtest_acc[:]): concaterr = np.concatenate((concaterr, err[:] / test_batches)) concatacc = np.concatenate((concatacc, acc[:] / test_batches * 100)) concaterr = np.concatenate((concaterr, [test_err / test_batches])) concatacc = np.concatenate((concatacc, [test_acc / test_batches * 100])) totalerr[-1] = concaterr totalacc[-1] = concatacc np.savetxt(dataloc + str(bits) + str(precision) + str(precisionchange) + "err.csv", totalerr, delimiter=",") np.savetxt(dataloc + str(bits) + str(precision) + str(precisionchange) + "acc.csv", totalacc, delimiter=",") # Optionally, you could now dump the network weights to a file like this: # np.savez('model.npz', *lasagne.layers.get_all_param_values(network)) # # And load them again later on like this: # with np.load('model.npz') as f: # param_values = [f['arr_%d' % i] for i in range(len(f.files))] # lasagne.layers.set_all_param_values(network, param_values) x = np.arange(0,num_epochs+1, 1) plt.figure(1) ax = plt.subplot(211) print(totalerr.shape) for errtot in np.transpose(totalerr)[:-1]: plt.plot(x, errtot) plt.plot(x, np.transpose(totalerr)[-1]) plt.xlabel('epochs', fontsize=18) plt.ylabel('loss', fontsize=16) ax.set_yscale('log') plt.subplot(212) i = 0 j = 0 print(totalacc.shape) for acctot in np.transpose(totalacc)[:-1]: name = str(bits[i]) + " total bits, " + str(precision[i][j]) + " fraction bits" plt.plot(x, acctot, label=name) j += 1 if j == len(precision[i]): j = 0 i += 1 plt.plot(x, np.transpose(totalacc)[-1], label='float32') plt.xlabel('epochs', fontsize=18) plt.ylabel('accuracy', fontsize=16) plt.legend(bbox_to_anchor=(0.6, 0.3), loc=2, borderaxespad=0.) plt.show()
import theano import theano.tensor as T import numpy as np import load from theano.tensor.nnet.conv import conv2d from theano.tensor.signal.downsample import max_pool_2d # load data x_train, t_train, x_test, t_test = load.cifar10(dtype=theano.config.floatX, grayscale=False) labels_test = np.argmax(t_test, axis=1) # reshape data x_train = x_train.reshape((x_train.shape[0], 3, 32, 32)) x_test = x_test.reshape((x_test.shape[0], 3, 32, 32)) # define symbolic Theano variables x = T.tensor4() t = T.matrix() # define model: neural network def floatX(x): return np.asarray(x, dtype=theano.config.floatX) def init_weights(shape): return theano.shared(floatX(np.random.randn(*shape) * 0.1))