def generate_batches(): # Generate a directory containing NUM_BATCHES of DATASET total = NUM_BATCHES * BATCH_SIZE # Create a matrix of random integers within the range # [0,lenght_dataset-1] f = open(DATASET_PATH[DATASET][0]) g = open(DATASET_PATH[DATASET][1]) test_data = ft.read(f) test_labels = ft.read(g) resulting_data = numpy.zeros((total,IMGSHP[0]*IMGSHP[1])) resulting_labels = numpy.zeros((total,)) f.close();g.close() ds_size = len(test_data) rand_seq = numpy.random.random_integers(ds_size-1, size=(NUM_BATCHES,BATCH_SIZE)) for i in range(NUM_BATCHES): for j in range(BATCH_SIZE): resulting_data[i*BATCH_SIZE+j]=test_data[rand_seq[i,j]] resulting_labels[i*BATCH_SIZE+j] = test_labels[rand_seq[i,j]] image = generate_image(resulting_data[i*BATCH_SIZE:(i+1)*BATCH_SIZE]) text = generate_labels(resulting_labels[i*BATCH_SIZE:(i+1)*BATCH_SIZE], rand_seq[i]) filename = DATASET + '_' + str("%04d" % int(i+1)) image.save(filename+'.jpeg') save_text(text,filename) ft_name = 'AMT_'+DATASET+'_'+str(NUM_BATCHES) generate_ft_file(resulting_data,resulting_labels,ft_name)
def __init__(self,seed=9854): #Ces 4 variables representent la taille du "crop" sur l'image2 #Ce "crop" est pris a partie de image1[15,15], le milieu de l'image1 self.haut=2 self.bas=2 self.gauche=2 self.droite=2 #Ces deux variables representent le deplacement en x et y par rapport #au milieu du bord gauche ou droit self.x_arrivee=0 self.y_arrivee=0 #Cette variable =1 si l'image est mise a gauche et -1 si a droite #et =0 si au centre, mais plus pale self.endroit=-1 #Cette variable determine l'opacite de l'ajout dans le cas ou on est au milieu self.opacite=0.5 #C'est completement arbitraire. Possible de le changer si voulu #Sert a dire si on fait quelque chose. 0=faire rien, 1 on fait quelque chose self.appliquer=1 self.seed=seed #numpy.random.seed(self.seed) f3 = open('/data/lisa/data/ift6266h10/echantillon_occlusion.ft') #Doit etre sur le reseau DIRO. #f3 = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/echantillon_occlusion.ft') #Il faut arranger le path sinon w=ft.read(f3) f3.close() self.longueur=len(w) self.d=(w.astype('float'))/255
def __init__(self,seed=1256): self.angle=0 #Angle en degre de la rotation (entre 0 et 180) self.numero=0 #Le numero du 1 choisi dans la banque de 1 self.gauche=-1 #Le numero de la colonne la plus a gauche contenant le 1 self.droite=-1 self.haut=-1 self.bas=-1 self.faire=1 #1=on effectue et 0=fait rien self.crop_haut=0 self.crop_gauche=0 #Ces deux valeurs sont entre 0 et 31 afin de definir #l'endroit ou sera pris le crop dans l'image du 1 self.largeur_bande=-1 #La largeur de la bande self.smooth=-1 #La largeur de la matrice carree servant a l'erosion self.nb_ratures=-1 #Le nombre de ratures appliques self.fini=0 #1=fini de mettre toutes les couches 0=pas fini self.complexity=0 #Pour garder en memoire la complexite si plusieurs couches sont necessaires self.seed=seed #numpy.random.seed(self.seed) f3 = open('/data/lisa/data/ift6266h10/un_rature.ft') #Doit etre sur le reseau DIRO. #f3 = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/un_rature.ft') #Il faut arranger le path sinon w=ft.read(f3) f3.close() self.d=(w.astype('float'))/255 self.patch=self.d[0].reshape((32,32)) #La patch de rature qui sera appliquee sur l'image
def read_test_data(mlp_model): #read the data h = open(data_path+test_data) i= open(data_path+test_labels) raw_test_data = ft.read(h) raw_test_labels = ft.read(i) i.close() h.close() #read the model chosen a=np.load(mlp_model) W1=a['W1'] W2=a['W2'] b1=a['b1'] b2=a['b2'] return (W1,b1,W2,b2,raw_test_data,raw_test_labels)
def load_tfd(path): ''' import scipy.io as io data = io.loadmat(os.path.join(path, 'TFD_48x48.mat')) X = cast32(data['images'])/cast32(255) X = X.reshape((X.shape[0], X.shape[1] * X.shape[2])) labels = data['labs_ex'].flatten() labeled = labels != -1 unlabeled = labels == -1 train_X = X[unlabeled] valid_X = X[unlabeled][:100] # Stuf test_X = X[labeled] del data ''' import pylearn.io.filetensor as io F = open(os.join.path(path, 'TFD_48x48.ft'), 'r') train_X = ft.read(F) train_Y = ft.read(F) valid_X = ft.read(F) valid_Y = ft.read(F) test_X = ft.read(F) test_Y = ft.read(F) return (train_X, train_Y), (valid_X, valid_Y), (test_X, test_Y)
def load(cls, which_set, desc): assert desc in ['dat','cat','info'] base = '%s/norb_small/original/smallnorb-' % os.getenv('PYLEARN2_DATA_PATH') if which_set == 'train': base += '5x46789x9x18x6x2x96x96-training' else: base += '5x01235x9x18x6x2x96x96-testing' fp = open(base + '-%s.mat' % desc, 'r') data = filetensor.read(fp) fp.close() return data
def load(cls, which_set, desc): assert desc in ['dat', 'cat', 'info'] base = '%s/norb_small/original/smallnorb-' % os.getenv( 'PYLEARN2_DATA_PATH') if which_set == 'train': base += '5x46789x9x18x6x2x96x96-training' else: base += '5x01235x9x18x6x2x96x96-testing' fp = open(base + '-%s.mat' % desc, 'r') data = filetensor.read(fp) fp.close() return data
def test_rbm(b_size = 20, nhidden = 1000, kk = 1, persistance = 0): """ Demonstrate *** This is demonstrated on MNIST. :param learning_rate: learning rate used for training the RBM :param training_epochs: number of epochs used for training :param dataset: path the the pickled dataset """ learning_rate=0.1 # if data_set==0: # datasets=datasets.nist_all() # elif data_set==1: # datasets=datasets.nist_P07() # elif data_set==2: # datasets=datasets.PNIST07() data_path = '/data/lisa/data/nist/by_class/' f = open(data_path+'all/all_train_data.ft') g = open(data_path+'all/all_train_labels.ft') h = open(data_path+'all/all_test_data.ft') i = open(data_path+'all/all_test_labels.ft') train_set_x_uint8 = theano.shared(ft.read(f)) test_set_x_uint8 = theano.shared(ft.read(h)) train_set_x = T.cast(train_set_x_uint8/255.,theano.config.floatX) train_set_y = ft.read(g) test_set_x = T.cast(test_set_x_uint8/255.,theano.config.floatX) test_set_y = ft.read(i) f.close() g.close() i.close() h.close() #t = len(train_set_x) # revoir la recuperation des donnees ## dataset = load_data(dataset) ## ## train_set_x, train_set_y = datasets[0] ## test_set_x , test_set_y = datasets[2] training_epochs = 1 # a determiner batch_size = b_size # size of the minibatch # compute number of minibatches for training, validation and testing n_train_batches = train_set_x_uint8.value.shape[0] / batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images rng = numpy.random.RandomState(123) theano_rng = RandomStreams( rng.randint(2**30)) # construct the RBM class rbm = RBM( input = x, n_visible=32*32, \ n_hidden = nhidden, numpy_rng = rng, theano_rng = theano_rng) # initialize storage fot the persistent chain (state = hidden layer of chain) if persistance == 1: persistent_chain = theano.shared(numpy.zeros((batch_size, 500))) # get the cost and the gradient corresponding to one step of CD cost, updates = rbm.cd(lr=learning_rate, persistent=persistent_chain, k= kk) else: # get the cost and the gradient corresponding to one step of CD #persistance_chain = None cost, updates = rbm.cd(lr=learning_rate, persistent=None, k= kk) ################################# # Training the RBM # ################################# #os.chdir('~') dirname = str(persistance) + '_' + str(nhidden) + '_' + str(b_size) + '_'+ str(kk) os.makedirs(dirname) os.chdir(dirname) print 'yes' # it is ok for a theano function to have no output # the purpose of train_rbm is solely to update the RBM parameters print type(batch_size) print index.dtype train_rbm = theano.function([index], cost, updates = updates, givens = { x: train_set_x[index*batch_size:(index+1)*batch_size]}) print 'yep' plotting_time = 0.0 start_time = time.clock() bufsize = 1000 # go through training epochs costs = [] for epoch in xrange(training_epochs): # go through the training set mean_cost = [] for batch_index in xrange(n_train_batches): mean_cost += [train_rbm(batch_index)] # for mini_x, mini_y in datasets.train(b_size): # mean_cost += [train_rbm(mini_x)] ## learning_rate = learning_rate - 0.0001 ## learning_rate = learning_rate/(tau+( epoch*batch_index*batch_size)) #learning_rate = learning_rate/10 costs.append(numpy.mean(mean_cost)) # Plot filters after each training epoch plotting_start = time.clock() # Construct image from the weight matrix image = PIL.Image.fromarray(tile_raster_images( X = rbm.W.value.T, img_shape = (32,32),tile_shape = (10,10), tile_spacing=(1,1))) image.save('filters_at_epoch_%i.png'%epoch) plotting_stop = time.clock() plotting_time += (plotting_stop - plotting_start) end_time = time.clock() pretraining_time = (end_time - start_time) - plotting_time ################################# # Sampling from the RBM # ################################# # find out the number of test samples #number_of_test_samples = 100 number_of_test_samples = test_set_x.value.shape[0] #test_set_x, test_y = datasets.test(100*b_size) # pick random test examples, with which to initialize the persistent chain test_idx = rng.randint(number_of_test_samples - b_size) persistent_vis_chain = theano.shared(test_set_x.value[test_idx:test_idx+b_size]) # define one step of Gibbs sampling (mf = mean-field) [hid_mf, hid_sample, vis_mf, vis_sample] = rbm.gibbs_vhv(persistent_vis_chain) # the sample at the end of the channel is returned by ``gibbs_1`` as # its second output; note that this is computed as a binomial draw, # therefore it is formed of ints (0 and 1) and therefore needs to # be converted to the same dtype as ``persistent_vis_chain`` vis_sample = T.cast(vis_sample, dtype=theano.config.floatX) # construct the function that implements our persistent chain # we generate the "mean field" activations for plotting and the actual samples for # reinitializing the state of our persistent chain sample_fn = theano.function([], [vis_mf, vis_sample], updates = { persistent_vis_chain:vis_sample}) # sample the RBM, plotting every `plot_every`-th sample; do this # until you plot at least `n_samples` n_samples = 10 # run minibatch size chains for gibbs samples (number of negative particles) plot_every = b_size for idx in xrange(n_samples): # do `plot_every` intermediate samplings of which we do not care for jdx in xrange(plot_every): vis_mf, vis_sample = sample_fn() # construct image image = PIL.Image.fromarray(tile_raster_images( X = vis_mf, img_shape = (32,32), tile_shape = (10,10), tile_spacing = (1,1) ) ) #print ' ... plotting sample ', idx image.save('sample_%i_step_%i.png'%(idx,idx*jdx)) #save the model model = [rbm.W, rbm.vbias, rbm.hbias] f = fopen('params.txt', 'w') cPickle.dump(model, f, protocol = -1) f.close() #os.chdir('./..') return numpy.mean(costs), pretraining_time*36
Testing on : digits Total entries : 102.0 Turks per batch : 3 Average test error : 50.9803921569% Error variance : 1.33333333333% """ import csv,numpy,re,decimal from ift6266 import datasets from pylearn.io import filetensor as ft fnist = open('nist_train_class_freq.ft','r') fp07 = open('p07_train_class_freq.ft','r') fpnist = open('pnist_train_class_freq.ft','r') nist_freq_table = ft.read(fnist) p07_freq_table = ft.read(fp07) pnist_freq_table = ft.read(fpnist) fnist.close();fp07.close();fpnist.close() DATASET_PATH = { 'nist' : '/data/lisa/data/ift6266h10/amt_data/nist/', 'p07' : '/data/lisa/data/ift6266h10/amt_data/p07/', 'pnist' : '/data/lisa/data/ift6266h10/amt_data/pnist/' } freq_tables = { 'nist' : nist_freq_table, 'p07' : p07_freq_table, 'pnist': pnist_freq_table } CVSFILE = None
''' Script qui calcule la proportion de chiffres, lettres minuscules et lettres majuscules dans NIST train et NIST test. Sylvain Pannetier Lebeuf dans le cadre de IFT6266, hiver 2010 ''' from pylearn.io import filetensor as ft import matplotlib.pyplot as plt #f1 = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/all_train_labels.ft') f1 = open('/data/lisa/data/nist/by_class/all/all_train_labels.ft') train = ft.read(f1) #f2 = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/all_test_labels.ft') f2 = open('/data/lisa/data/nist/by_class/all/all_test_labels.ft') test = ft.read(f2) f1.close() f2.close() #Les 6 variables train_c=0 train_min=0 train_maj=0 test_c=0 test_min=0 test_maj=0
def load_data_labels(self, filenames, pair): for i, fn in enumerate(filenames): f = open(os.path.join(self.basepath, fn)) pair[i] = filetensor.read(f) f.close()
# repertoire qui contient les donnees NIST # le repertoire suivant va fonctionner si vous etes connecte sur un ordinateur # du reseau DIRO datapath = '/data/lisa/data/nist/by_class/' # le fichier .ft contient chiffres NIST dans un format efficace. Les chiffres # sont stockes dans une matrice de NxD, ou N est le nombre d'images, est D est # le nombre de pixels par image (32x32 = 1024). Chaque pixel de l'image est une # valeur entre 0 et 255, correspondant a un niveau de gris. Les valeurs sont # stockees comme des uint8, donc des bytes. f = open(datapath+'digits/digits_train_data.ft') # Verifier que vous avez assez de memoire pour loader les donnees au complet # dans le memoire. Sinon, utilisez ft.arraylike, une classe construite # specialement pour des fichiers qu'on ne souhaite pas loader dans RAM. d = ft.read(f) # Affichage d'une image pylab.imshow(d[0].reshape((32,32))) pylab.show() # NB: N'oubliez pas de diviser les valeurs des pixels par 255. si jamais vous # utilisez les donnees commes entrees dans un reseaux de neurones et que vous # voulez des entres entre 0 et 1. # digits_train_data.ft contient les images, digits_train_labels.ft contient les # etiquettes f = open(datapath+'digits/digits_train_labels.ft') labels = ft.read(f) print 'etiquette: ', labels[0]
def sgd_optimization_nist( learning_rate=0.01, \ n_iter = 300, n_code_layer = 400, \ complexity = 0.1): """ Demonstrate stochastic gradient descent optimization for a denoising autoencoder This is demonstrated on MNIST. :param learning_rate: learning rate used (factor for the stochastic gradient :param pretraining_epochs: number of epoch to do pretraining :param pretrain_lr: learning rate to be used during pre-training :param n_iter: maximal number of iterations ot run the optimizer """ #open file to save the validation and test curve filename = 'lr_' + str(learning_rate) + 'ni_' + str(n_iter) + 'nc_' + str(n_code_layer) + \ 'c_' + str(complexity) + '.txt' result_file = open(filename, 'w') data_path = '/data/lisa/data/nist/by_class/' f = open(data_path+'all/all_train_data.ft') g = open(data_path+'all/all_train_labels.ft') h = open(data_path+'all/all_test_data.ft') i = open(data_path+'all/all_test_labels.ft') train_set_x = ft.read(f) train_set_y = ft.read(g) test_set_x = ft.read(h) test_set_y = ft.read(i) f.close() g.close() i.close() h.close() # make minibatches of size 20 batch_size = 20 # sized of the minibatch #create a validation set the same size as the test size #use the end of the training array for this purpose #discard the last remaining so we get a %batch_size number test_size=len(test_set_y) test_size = int(test_size/batch_size) test_size*=batch_size train_size = len(train_set_x) train_size = int(train_size/batch_size) train_size*=batch_size validation_size =test_size offset = train_size-test_size if True: print 'train size = %d' %train_size print 'test size = %d' %test_size print 'valid size = %d' %validation_size print 'offset = %d' %offset #train_set = (train_set_x,train_set_y) train_batches = [] for i in xrange(0, train_size-test_size, batch_size): train_batches = train_batches + \ [(train_set_x[i:i+batch_size], train_set_y[i:i+batch_size])] test_batches = [] for i in xrange(0, test_size, batch_size): test_batches = test_batches + \ [(test_set_x[i:i+batch_size], test_set_y[i:i+batch_size])] valid_batches = [] for i in xrange(0, test_size, batch_size): valid_batches = valid_batches + \ [(train_set_x[offset+i:offset+i+batch_size], \ train_set_y[offset+i:offset+i+batch_size])] ishape = (32,32) # this is the size of NIST images # allocate symbolic variables for the data x = T.fmatrix() # the data is presented as rasterized images y = T.lvector() # the labels are presented as 1D vector of # [long int] labels # construct the denoising autoencoder class n_ins = 32*32 encoder = dA(n_ins, n_code_layer, complexity, input = x.reshape((batch_size,n_ins))) # Train autoencoder # compute gradients of the layer parameters gW = T.grad(encoder.cost, encoder.W) gb = T.grad(encoder.cost, encoder.b) gb_prime = T.grad(encoder.cost, encoder.b_prime) # compute the updated value of the parameters after one step updated_W = encoder.W - gW * learning_rate updated_b = encoder.b - gb * learning_rate updated_b_prime = encoder.b_prime - gb_prime * learning_rate # defining the function that evaluate the symbolic description of # one update step train_model = theano.function([x], encoder.cost, updates=\ { encoder.W : updated_W, \ encoder.b : updated_b, \ encoder.b_prime : updated_b_prime } ) # compiling a theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function([x], encoder.cost) normalize = numpy.asarray(255, dtype=theano.config.floatX) n_minibatches = len(train_batches) # early-stopping parameters patience = 10000000 / batch_size # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = n_minibatches # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = float('inf') best_iter = 0 test_score = 0. start_time = time.clock() # have a maximum of `n_iter` iterations through the entire dataset for iter in xrange(n_iter* n_minibatches): # get epoch and minibatch index epoch = iter / n_minibatches minibatch_index = iter % n_minibatches # get the minibatches corresponding to `iter` modulo # `len(train_batches)` x,y = train_batches[ minibatch_index ] ''' if iter == 0: b = numpy.asarray(255, dtype=theano.config.floatX) x = x / b print x print y print x.__class__ print x.shape print x.dtype.name print y.dtype.name print x.min(), x.max() ''' cost_ij = train_model(x/normalize) if (iter+1) % validation_frequency == 0: # compute zero-one loss on validation set this_validation_loss = 0. for x,y in valid_batches: # sum up the errors for each minibatch this_validation_loss += test_model(x/normalize) # get the average by dividing with the number of minibatches this_validation_loss /= len(valid_batches) print('epoch %i, minibatch %i/%i, validation error %f ' % \ (epoch, minibatch_index+1, n_minibatches, \ this_validation_loss)) # save value in file result_file.write(str(epoch) + ' ' + str(this_validation_loss)+ '\n') # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold : patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_score = 0. for x,y in test_batches: test_score += test_model(x/normalize) test_score /= len(test_batches) print((' epoch %i, minibatch %i/%i, test error of best ' 'model %f ') % (epoch, minibatch_index+1, n_minibatches, test_score)) if patience <= iter : print('iter (%i) is superior than patience(%i). break', (iter, patience)) break end_time = time.clock() print(('Optimization complete with best validation score of %f ,' 'with test performance %f ') % (best_validation_loss, test_score)) print ('The code ran for %f minutes' % ((end_time-start_time)/60.)) result_file.close() return (best_validation_loss, test_score, (end_time-start_time)/60, best_iter)
def __init__(self): data_path = '/data/lisa/data/nist/by_class/' digits_train_data = 'digits/digits_train_data.ft' digits_train_labels = 'digits/digits_train_labels.ft' digits_test_data = 'digits/digits_test_data.ft' digits_test_labels = 'digits/digits_test_labels.ft' lower_train_data = 'lower/lower_train_data.ft' lower_train_labels = 'lower/lower_train_labels.ft' lower_test_data = 'lower/lower_test_data.ft' lower_test_labels = 'lower/lower_test_labels.ft' upper_train_data = 'upper/upper_train_data.ft' upper_train_labels = 'upper/upper_train_labels.ft' upper_test_data = 'upper/upper_test_data.ft' upper_test_labels = 'upper/upper_test_labels.ft' test_data = 'all/all_test_data.ft' test_labels = 'all/all_test_labels.ft' print 'Opening data...' f_digits_train_data = open(data_path + digits_train_data) f_digits_train_labels = open(data_path + digits_train_labels) f_digits_test_data = open(data_path + digits_test_data) f_digits_test_labels = open(data_path + digits_test_labels) f_lower_train_data = open(data_path + lower_train_data) f_lower_train_labels = open(data_path + lower_train_labels) f_lower_test_data = open(data_path + lower_test_data) f_lower_test_labels = open(data_path + lower_test_labels) f_upper_train_data = open(data_path + upper_train_data) f_upper_train_labels = open(data_path + upper_train_labels) f_upper_test_data = open(data_path + upper_test_data) f_upper_test_labels = open(data_path + upper_test_labels) #f_test_data = open(data_path + test_data) #f_test_labels = open(data_path + test_labels) self.raw_digits_train_data = ft.read(f_digits_train_data) self.raw_digits_train_labels = ft.read(f_digits_train_labels) self.raw_digits_test_data = ft.read(f_digits_test_data) self.raw_digits_test_labels = ft.read(f_digits_test_labels) self.raw_lower_train_data = ft.read(f_lower_train_data) self.raw_lower_train_labels = ft.read(f_lower_train_labels) self.raw_lower_test_data = ft.read(f_lower_test_data) self.raw_lower_test_labels = ft.read(f_lower_test_labels) self.raw_upper_train_data = ft.read(f_upper_train_data) self.raw_upper_train_labels = ft.read(f_upper_train_labels) self.raw_upper_test_data = ft.read(f_upper_test_data) self.raw_upper_test_labels = ft.read(f_upper_test_labels) #self.raw_test_data = ft.read(f_test_data) #self.raw_test_labels = ft.read(f_test_labels) f_digits_train_data.close() f_digits_train_labels.close() f_digits_test_data.close() f_digits_test_labels.close() f_lower_train_data.close() f_lower_train_labels.close() f_lower_test_data.close() f_lower_test_labels.close() f_upper_train_data.close() f_upper_train_labels.close() f_upper_test_data.close() f_upper_test_labels.close() #f_test_data.close() #f_test_labels.close() print 'Data opened'
Sert a creer un petit jeu de donnees afin de pouvoir avoir des fragments de lettres pour ajouter bruit d'occlusion Sylvain Pannetier Lebeuf dans le cadre de IFT6266, hiver 2010 ''' from pylearn.io import filetensor as ft import pylab import random as r from numpy import * nombre = 20000 #La grandeur de l'echantillon f = open('all_train_data.ft') #Le jeu de donnees est en local. d = ft.read(f) f.close() print len(d) random.seed(3525) echantillon=r.sample(xrange(len(d)),nombre) nouveau=d[0:nombre] for i in xrange(nombre): nouveau[i]=d[echantillon[i]] f2 = open('echantillon_occlusion.ft', 'w') ft.write(f2,nouveau) f2.close()
def experiment(state, channel): print 'LOADING MODEL CONFIG' config_path = '/'+os.path.join(*state.model_path.split('/')) print state.model_path if 'config' in os.listdir(config_path): config_file = open(os.path.join(config_path, 'config'), 'r') config = config_file.readlines() try: config_vals = config[0].split('(')[1:][0].split(')')[:-1][0].split(', ') except: config_vals = config[0][3:-1].replace(': ','=').replace("'","").split(', ') config_vals = filter(lambda x:not 'jobman' in x and not '/' in x and not ':' in x and not 'experiment' in x, config_vals) for CV in config_vals: print CV try: exec('state.'+CV) in globals(), locals() except: exec('state.'+CV.split('=')[0]+"='"+CV.split('=')[1]+"'") in globals(), locals() else: import pdb; pdb.set_trace() # LOAD DATA if 'mnist' in state.data_path: (train_X, train_Y), (valid_X, valid_Y), (test_X, test_Y) = load_mnist(state.data_path) train_X = numpy.concatenate((train_X, valid_X)) elif 'TFD' in state.data_path: (train_X, train_Y), (valid_X, valid_Y), (test_X, test_Y) = load_tfd(state.data_path) N_input = train_X.shape[1] root_N_input = numpy.sqrt(N_input) #train_X = binarize(train_X) #valid_X = binarize(valid_X) #test_X = binarize(test_X) numpy.random.seed(1) numpy.random.shuffle(train_X) train_X = theano.shared(train_X) valid_X = theano.shared(valid_X) test_X = theano.shared(test_X) # shuffle Y also if necessary # THEANO VARIABLES X = T.fmatrix() index = T.lscalar() MRG = RNG_MRG.MRG_RandomStreams(1) # SPECS K = state.K N = state.N layer_sizes = [N_input] + [state.hidden_size] * K learning_rate = theano.shared(cast32(state.learning_rate)) annealing = cast32(state.annealing) momentum = theano.shared(cast32(state.momentum)) # PARAMETERS # weights weights_list = [get_shared_weights(layer_sizes[i], layer_sizes[i+1], numpy.sqrt(6. / (layer_sizes[i] + layer_sizes[i+1] )), 'W') for i in range(K)] bias_list = [get_shared_bias(layer_sizes[i], 'b') for i in range(K + 1)] # LOAD PARAMS print 'Loading model params...', print 'Loading last epoch...', param_files = filter(lambda x: x.endswith('ft'), os.listdir(config_path)) max_epoch = numpy.argmax([int(x.split('_')[-1].split('.')[0]) for x in param_files]) params_to_load = os.path.join(config_path, param_files[max_epoch]) F = open(params_to_load, 'r') n_params = len(weights_list) + len(bias_list) print param_files[max_epoch] for i in range(0, len(weights_list)): weights_list[i].set_value(ft.read(F)) for i in range(len(bias_list)): bias_list[i].set_value(ft.read(F)) print 'Model parameters loaded!!' # functions def dropout(IN, p = 0.5): noise = MRG.binomial(p = p, n = 1, size = IN.shape, dtype='float32') OUT = (IN * noise) / cast32(p) return OUT def add_gaussian_noise(IN, std = 1): print 'GAUSSIAN NOISE : ', std noise = MRG.normal(avg = 0, std = std, size = IN.shape, dtype='float32') OUT = IN + noise return OUT def corrupt_input(IN, p = 0.5): # salt and pepper? masking? noise = MRG.binomial(p = p, n = 1, size = IN.shape, dtype='float32') IN = IN * noise return IN def salt_and_pepper(IN, p = 0.2): # salt and pepper noise print 'DAE uses salt and pepper noise' a = MRG.binomial(size=IN.shape, n=1, p = 1 - p, dtype='float32') b = MRG.binomial(size=IN.shape, n=1, p = 0.5, dtype='float32') c = T.eq(a,0) * b return IN * a + c def update_odd_layers(hiddens, noisy): for i in range(1, K+1, 2): print i if noisy: simple_update_layer(hiddens, None, i) else: simple_update_layer(hiddens, None, i, mul_noise = False, add_noise = False) # we can append the reconstruction at each step def update_even_layers(hiddens, p_X_chain, autoregression, noisy): for i in range(0, K+1, 2): print i if noisy: simple_update_layer(hiddens, p_X_chain, i, autoregression) else: simple_update_layer(hiddens, p_X_chain, i, autoregression, mul_noise = False, add_noise = False) def simple_update_layer(hiddens, p_X_chain, i, autoregression=False, mul_noise=True, add_noise=True): # Compute the dot product, whatever layer post_act_noise = 0 if i == 0: hiddens[i] = T.dot(hiddens[i+1], weights_list[i].T) + bias_list[i] elif i == K: hiddens[i] = T.dot(hiddens[i-1], weights_list[i-1]) + bias_list[i] # TODO compute d h_i / d h_(i-1) # derivee de h[i] par rapport a h[i-1] # W is what transpose... if state.scaled_noise: # to remove this, remove the post_act_noise variable initialisation and the following block # and put back post activation noise like it was (just normal calling of the function) W = weights_list[i-1] hn = T.tanh(hiddens[i]) ww = T.dot(W.T, W) s = (cast32(1) - hn**2) jj = ww * s.dimshuffle(0, 'x', 1) * s.dimshuffle(0, 1, 'x') scale_noise = lambda alpha : (alpha.dimshuffle(0, 1, 'x') * jj).sum(1) print 'SCALED_NOISE!!!, Last layer : set add_noise to False, add its own scaled noise' add_noise = False #pre_act_noise = MRG.normal(avg = 0, std = std, size = hn.shape, dtype='float32') post_act_noise = MRG.normal(avg = 0, std = state.hidden_add_noise_sigma, size = hn.shape, dtype='float32') #pre_act_noise = scale_noise(pre_act_noise) post_act_noise = scale_noise(post_act_noise) #hiddens[i] += pre_act_noise else: # next layer : layers[i+1], assigned weights : W_i # previous layer : layers[i-1], assigned weights : W_(i-1) hiddens[i] = T.dot(hiddens[i+1], weights_list[i].T) + T.dot(hiddens[i-1], weights_list[i-1]) + bias_list[i] # Add pre-activation noise if NOT input layer if i==1 and state.noiseless_h1: print '>>NO noise in first layer' add_noise = False # pre activation noise if i != 0 and add_noise and not state.scaled_noise: print 'Adding pre-activation gaussian noise' hiddens[i] = add_gaussian_noise(hiddens[i], state.hidden_add_noise_sigma) # ACTIVATION! if i == 0: print 'Sigmoid units' hiddens[i] = T.nnet.sigmoid(hiddens[i]) else: print 'Hidden units' hiddens[i] = hidden_activation(hiddens[i]) # post activation noise if i != 0 and add_noise: print 'Adding post-activation gaussian noise' if state.scaled_noise: hiddens[i] += post_act_noise else: hiddens[i] = add_gaussian_noise(hiddens[i], state.hidden_add_noise_sigma) # POST ACTIVATION NOISE if i != 0 and mul_noise and state.hidden_dropout: # dropout if hidden print 'Dropping out', state.hidden_dropout hiddens[i] = dropout(hiddens[i], state.hidden_dropout) elif i == 0: # if input layer -> append p(X|...) p_X_chain.append(hiddens[i]) # sample from p(X|...) if state.input_sampling: print 'Sampling from input' sampled = MRG.binomial(p = hiddens[i], size=hiddens[i].shape, dtype='float32') else: print '>>NO input sampling' sampled = hiddens[i] # add noise sampled = salt_and_pepper(sampled, state.input_salt_and_pepper) # set input layer hiddens[i] = sampled def update_layers(hiddens, p_X_chain, autoregression, noisy = True): print 'odd layer update' update_odd_layers(hiddens, noisy) print print 'even layer update' update_even_layers(hiddens, p_X_chain, autoregression, noisy) ''' F PROP ''' #X = T.fmatrix() if state.act == 'sigmoid': print 'Using sigmoid activation' hidden_activation = T.nnet.sigmoid elif state.act == 'rectifier': print 'Using rectifier activation' hidden_activation = lambda x : T.maximum(cast32(0), x) elif state.act == 'tanh': hidden_activation = lambda x : T.tanh(x) ''' Corrupt X ''' X_corrupt = salt_and_pepper(X, state.input_salt_and_pepper) f_noise = theano.function(inputs = [X], outputs = salt_and_pepper(X, state.input_salt_and_pepper)) ''' Commented for now (unless we need more denoising stuff) ############# # Denoise some numbers : show number, noisy number, reconstructed number ############# import random as R R.seed(1) random_idx = numpy.array(R.sample(range(len(test_X.get_value())), 100)) numbers = test_X.get_value()[random_idx] noisy_numbers = f_noise(test_X.get_value()[random_idx]) # Recompile the graph without noise for reconstruction function hiddens_R = [X] p_X_chain_R = [] for w,b in zip(weights_list, bias_list[1:]): # init with zeros hiddens_R.append(T.zeros_like(T.dot(hiddens_R[-1], w))) # The layer update scheme for i in range(2 * N * K): update_layers(hiddens_R, p_X_chain_R, noisy=False, autoregression=state.autoregression) f_recon = theano.function(inputs = [X], outputs = p_X_chain_R[-1]) ''' ################################## # Sampling, round 2 motherf***** # ################################## # the input to the sampling function network_state_input = [X] + [T.fmatrix() for i in range(K)] 'first input will be a noisy number and zeros at the hidden layer, is this correc?' # "Output" state of the network (noisy) # initialized with input, then we apply updates #network_state_output = network_state_input # WTFFFF why is it not the same? f*****g python list = list not the same as list = list(list) ??? network_state_output = [X] + network_state_input[1:] visible_pX_chain = [] #for i in range(2 * N * K): # update_layers(network_state_output, visible_pX_chain, noisy=True, autoregression=False) # ONE update update_layers(network_state_output, visible_pX_chain, noisy=True, autoregression=False) # WHY IS THERE A WARNING???? # because the first odd layers are not used -> directly computed FROM THE EVEN layers f_sample2 = theano.function(inputs = network_state_input, outputs = network_state_output + visible_pX_chain, on_unused_input='warn') def sampling_wrapper(NSI): out = f_sample2(*NSI) NSO = out[:len(network_state_output)] vis_pX_chain = out[len(network_state_output):] return NSO, vis_pX_chain def sample_some_numbers(n_digits = 400): to_sample = time.time() # The network's initial state #init_vis = test_X.get_value()[:1] init_vis = test_X[:1] noisy_init_vis = f_noise(init_vis) network_state = [[noisy_init_vis] + [numpy.zeros((1,len(b.get_value())), dtype='float32') for b in bias_list[1:]]] visible_chain = [init_vis] noisy_h0_chain = [noisy_init_vis] for i in range(n_digits - 1): # feed the last state into the network, compute new state, and obtain visible units expectation chain net_state_out, vis_pX_chain = sampling_wrapper(network_state[-1]) # append to the visible chain visible_chain += vis_pX_chain # append state output to the network state chain network_state.append(net_state_out) noisy_h0_chain.append(net_state_out[0]) print 'Took ' + str(time.time() - to_sample) + ' to sample ' + str(n_digits) + ' digits' return numpy.vstack(visible_chain), numpy.vstack(noisy_h0_chain) def plot_samples(epoch_number): V, H0 = sample_some_numbers() img_samples = PIL.Image.fromarray(tile_raster_images(V, (root_N_input,root_N_input), (20,20))) fname = 'samples_epoch_'+str(epoch_number)+'.png' img_samples.save(fname) def save_params(n, params): fname = 'params_epoch_'+str(n)+'.ft' f = open(fname, 'w') for p in params: ft.write(f, p.get_value(borrow=True)) f.close() def plot_one_digit(digit): plot_one = PIL.Image.fromarray(tile_raster_images(digit, (root_N_input,root_N_input), (1,1))) fname = 'one_digit.png' plot_one.save(fname) os.system('eog one_digit.png') def inpainting(digit): # The network's initial state # NOISE INIT init_vis = cast32(numpy.random.uniform(size=digit.shape)) #noisy_init_vis = f_noise(init_vis) #noisy_init_vis = cast32(numpy.random.uniform(size=init_vis.shape)) # INDEXES FOR VISIBLE AND NOISY PART noise_idx = (numpy.arange(N_input) % root_N_input < (root_N_input/2)) fixed_idx = (numpy.arange(N_input) % root_N_input > (root_N_input/2)) # function to re-init the visible to the same noise # FUNCTION TO RESET HALF VISIBLE TO DIGIT def reset_vis(V): V[0][fixed_idx] = digit[0][fixed_idx] return V # INIT DIGIT : NOISE and RESET HALF TO DIGIT init_vis = reset_vis(init_vis) network_state = [[init_vis] + [numpy.zeros((1,len(b.get_value())), dtype='float32') for b in bias_list[1:]]] visible_chain = [init_vis] noisy_h0_chain = [init_vis] for i in range(49): # feed the last state into the network, compute new state, and obtain visible units expectation chain net_state_out, vis_pX_chain = sampling_wrapper(network_state[-1]) # reset half the digit net_state_out[0] = reset_vis(net_state_out[0]) vis_pX_chain[0] = reset_vis(vis_pX_chain[0]) # append to the visible chain visible_chain += vis_pX_chain # append state output to the network state chain network_state.append(net_state_out) noisy_h0_chain.append(net_state_out[0]) return numpy.vstack(visible_chain), numpy.vstack(noisy_h0_chain) #V_inpaint, H_inpaint = inpainting(test_X.get_value()[:1]) #plot_one = PIL.Image.fromarray(tile_raster_images(V_inpaint, (root_N_input,root_N_input), (1,50))) #fname = 'test.png' #plot_one.save(fname) #os.system('eog test.png') #get all digits, and do it a couple of times test_X = test_X.get_value() #test_Y = test_Y.get_value() numpy.random.seed(1) test_idx = numpy.arange(len(test_Y)) for Iter in range(10): numpy.random.shuffle(test_idx) test_X = test_X[test_idx] test_Y = test_Y[test_idx] digit_idx = [(test_Y==i).argmax() for i in range(10)] inpaint_list = [] for idx in digit_idx: DIGIT = test_X[idx:idx+1] V_inpaint, H_inpaint = inpainting(DIGIT) inpaint_list.append(V_inpaint) INPAINTING = numpy.vstack(inpaint_list) plot_inpainting = PIL.Image.fromarray(tile_raster_images(INPAINTING, (root_N_input,root_N_input), (10,50))) fname = 'inpainting_'+str(Iter)+'.png' plot_inpainting.save(fname) if False and __name__ == "__main__": os.system('eog inpainting.png') # PARZEN # Generating 10000 samples samples, _ = sample_some_numbers(n_digits=10000) Mean, Std = main(state.sigma_parzen, samples, test_X) #plot_samples(999) #sample_numbers(counter, []) if __name__ == '__main__': return Mean, Std #import ipdb; ipdb.set_trace() return channel.COMPLETE
def load_mat(fname, save_dir=''): print >> sys.stderr, 'loading ndarray from file: ', save_dir + fname file_handle = open(os.path.join(save_dir, fname), 'r') rval = filetensor.read(file_handle) file_handle.close() return rval
def _load_image(): f = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/lower_test_data.ft') #Le jeu de donnees est en local. d = ft.read(f) w=numpy.asarray(d[0:1000]) return (w/255.0).astype('float')
''' creation des ensembles train, valid et test OCR ensemble valid est trainorig[:80000] ensemble test est trainorig[80000:160000] ensemble train est trainorig[160000:] trainorig est deja shuffled ''' from pylearn.io import filetensor as ft import numpy, os dir1 = '/data/lisa/data/ocr_breuel/filetensor/' dir2 = "/data/lisa/data/ift6266h10/" f = open(dir1 + 'unlv-corrected-2010-02-01-shuffled.ft') d = ft.read(f) f = open(dir2 + "ocr_valid_data.ft", 'wb') ft.write(f, d[:80000]) f = open(dir2 + "ocr_test_data.ft", 'wb') ft.write(f, d[80000:160000]) f = open(dir2 + "ocr_train_data.ft", 'wb') ft.write(f, d[160000:]) f = open(dir1 + 'unlv-corrected-2010-02-01-labels-shuffled.ft') d = ft.read(f) f = open(dir2 + "ocr_valid_labels.ft", 'wb') ft.write(f, d[:80000]) f = open(dir2 + "ocr_test_labels.ft", 'wb') ft.write(f, d[80000:160000]) f = open(dir2 + "ocr_train_labels.ft", 'wb') ft.write(f, d[160000:])
def load_mat(fname, save_dir=''): print 'loading ndarray from file: ', save_dir + fname file_handle = open(os.path.join(save_dir,fname), 'r') rval = filetensor.read(file_handle) file_handle.close() return rval