def jobman_entrypoint(state, channel): # record mercurial versions of each package pylearn.version.record_versions(state,[theano,ift6266,pylearn]) # TODO: remove this, bad for number of simultaneous requests on DB channel.save() # For test runs, we don't want to use the whole dataset so # reduce it to fewer elements if asked to. rtt = None if state.has_key('reduce_train_to'): rtt = state['reduce_train_to'] elif REDUCE_TRAIN_TO: rtt = REDUCE_TRAIN_TO if state.has_key('decrease_lr'): decrease_lr = state['decrease_lr'] else : decrease_lr = 0 if state.has_key('decrease_lr_pretrain'): dec=state['decrease_lr_pretrain'] else : dec=0 n_ins = 32*32 if state.has_key('subdataset'): subdataset_name=state['subdataset'] else: subdataset_name=SUBDATASET_NIST #n_outs = 62 # 10 digits, 26*2 (lower, capitals) if subdataset_name == "upper": n_outs = 26 subdataset = datasets.nist_upper() examples_per_epoch = NIST_UPPER_TRAIN_SIZE elif subdataset_name == "lower": n_outs = 26 subdataset = datasets.nist_lower() examples_per_epoch = NIST_LOWER_TRAIN_SIZE elif subdataset_name == "digits": n_outs = 10 subdataset = datasets.nist_digits() examples_per_epoch = NIST_DIGITS_TRAIN_SIZE else: n_outs = 62 subdataset = datasets.nist_all() examples_per_epoch = NIST_ALL_TRAIN_SIZE print 'Using subdataset ', subdataset_name #To be sure variables will not be only in the if statement PATH = '' nom_reptrain = '' nom_serie = "" if state['pretrain_choice'] == 0: nom_serie="series_NIST.h5" elif state['pretrain_choice'] == 1: nom_serie="series_P07.h5" series = create_series(state.num_hidden_layers,nom_serie) print "Creating optimizer with state, ", state optimizer = SdaSgdOptimizer(dataset_name=subdataset_name,\ dataset=subdataset,\ hyperparameters=state, \ n_ins=n_ins, n_outs=n_outs,\ examples_per_epoch=examples_per_epoch, \ series=series, max_minibatches=rtt) parameters=[] #Number of files of P07 used for pretraining nb_file=0 print('\n\tpretraining with NIST\n') optimizer.pretrain(subdataset, decrease = dec) channel.save() #Set some of the parameters used for the finetuning if state.has_key('finetune_set'): finetune_choice=state['finetune_set'] else: finetune_choice=FINETUNE_SET if state.has_key('max_finetuning_epochs'): max_finetune_epoch_NIST=state['max_finetuning_epochs'] else: max_finetune_epoch_NIST=MAX_FINETUNING_EPOCHS if state.has_key('max_finetuning_epochs_P07'): max_finetune_epoch_P07=state['max_finetuning_epochs_P07'] else: max_finetune_epoch_P07=max_finetune_epoch_NIST #Decide how the finetune is done if finetune_choice == 0: print('\n\n\tfinetune with NIST\n\n') optimizer.reload_parameters('params_pretrain.txt') optimizer.finetune(subdataset,subdataset,max_finetune_epoch_NIST,ind_test=1,decrease=decrease_lr) channel.save() if finetune_choice == 1: print('\n\n\tfinetune with P07\n\n') optimizer.reload_parameters('params_pretrain.txt') optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0,decrease=decrease_lr) channel.save() if finetune_choice == 2: print('\n\n\tfinetune with P07 followed by NIST\n\n') optimizer.reload_parameters('params_pretrain.txt') optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=20,decrease=decrease_lr) optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=21,decrease=decrease_lr) channel.save() if finetune_choice == 3: print('\n\n\tfinetune with NIST only on the logistic regression on top (but validation on P07).\n\ All hidden units output are input of the logistic regression\n\n') optimizer.reload_parameters('params_pretrain.txt') optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,special=1,decrease=decrease_lr) if finetune_choice==-1: print('\nSERIE OF 4 DIFFERENT FINETUNINGS') print('\n\n\tfinetune with NIST\n\n') sys.stdout.flush() optimizer.reload_parameters('params_pretrain.txt') optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,decrease=decrease_lr) channel.save() print('\n\n\tfinetune with P07\n\n') sys.stdout.flush() optimizer.reload_parameters('params_pretrain.txt') optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0,decrease=decrease_lr) channel.save() print('\n\n\tfinetune with P07 (done earlier) followed by NIST (written here)\n\n') sys.stdout.flush() optimizer.reload_parameters('params_finetune_P07.txt') optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=21,decrease=decrease_lr) channel.save() print('\n\n\tfinetune with NIST only on the logistic regression on top.\n\ All hidden units output are input of the logistic regression\n\n') sys.stdout.flush() optimizer.reload_parameters('params_pretrain.txt') optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,special=1,decrease=decrease_lr) channel.save() channel.save() return channel.COMPLETE
args = sys.argv[1:] if len(args) > 0 and args[0] == 'sigmoid': type = 0 elif len(args) > 0 and args[0] == 'tanh': type = 1 part = 2 #0=train, 1=valid, 2=test PATH = '' #Can be changed too if model is not in the current drectory if os.path.exists(PATH+'params_finetune_NIST.txt'): start_time = time.clock() print ('\n finetune = NIST ') print "NIST DIGITS" test_data(PATH+'params_finetune_NIST.txt',datasets.nist_digits(),part=part,type=type) print "NIST LOWER CASE" test_data(PATH+'params_finetune_NIST.txt',datasets.nist_lower(),part=part,type=type) print "NIST UPPER CASE" test_data(PATH+'params_finetune_NIST.txt',datasets.nist_upper(),part=part,type=type) end_time = time.clock() print ('It took %f minutes' %((end_time-start_time)/60.)) if os.path.exists(PATH+'params_finetune_P07.txt'): start_time = time.clock() print ('\n finetune = P07 ') print "NIST DIGITS" test_data(PATH+'params_finetune_P07.txt',datasets.nist_digits(),part=part,type=type) print "NIST LOWER CASE" test_data(PATH+'params_finetune_P07.txt',datasets.nist_lower(),part=part,type=type)
def log_reg( learning_rate = 0.13, nb_max_examples =1000000, batch_size = 50, \ dataset=datasets.nist_digits(), image_size = 32 * 32, nb_class = 10, \ patience = 5000, patience_increase = 2, improvement_threshold = 0.995): #28 * 28 = 784 """ Demonstrate stochastic gradient descent optimization of a log-linear model This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type nb_max_examples: int :param nb_max_examples: maximal number of epochs to run the optimizer :type batch_size: int :param batch_size: size of the minibatch :type dataset: dataset :param dataset: a dataset instance from ift6266.datasets :type image_size: int :param image_size: size of the input image in pixels (width * height) :type nb_class: int :param nb_class: number of classes :type patience: int :param patience: look as this many examples regardless :type patience_increase: int :param patience_increase: wait this much longer when a new best is found :type improvement_threshold: float :param improvement_threshold: a relative improvement of this much is considered significant """ #-------------------------------------------------------------------------------------------------------------------- # Build actual model #-------------------------------------------------------------------------------------------------------------------- print '... building the model' # allocate symbolic variables for the data index = T.lscalar( ) # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels # construct the logistic regression class classifier = LogisticRegression( input = x, n_in = image_size, n_out = nb_class ) # the cost we minimize during training is the negative log likelihood of # the model in symbolic format cost = classifier.negative_log_likelihood( y ) # compiling a Theano function that computes the mistakes that are made by # the model on a minibatch test_model = theano.function( inputs = [ x, y ], outputs = classifier.errors( y )) validate_model = theano.function( inputs = [ x, y ], outputs = classifier.errors( y )) # compute the gradient of cost with respect to theta = ( W, b ) g_W = T.grad( cost = cost, wrt = classifier.W ) g_b = T.grad( cost = cost, wrt = classifier.b ) # specify how to update the parameters of the model as a dictionary updates = { classifier.W: classifier.W - learning_rate * g_W,\ classifier.b: classifier.b - learning_rate * g_b} # compiling a Theano function `train_model` that returns the cost, but in # the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs = [ x, y ], outputs = cost, updates = updates) #-------------------------------------------------------------------------------------------------------------------- # Train model #-------------------------------------------------------------------------------------------------------------------- print '... training the model' # early-stopping parameters patience = 5000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = patience * 0.5 # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = float('inf') test_score = 0. start_time = time.clock() done_looping = False n_iters = nb_max_examples / batch_size epoch = 0 iter = 0 while ( iter < n_iters ) and ( not done_looping ): epoch = epoch + 1 for x, y in dataset.train(batch_size): minibatch_avg_cost = train_model( x, y ) # iteration number iter += 1 if iter % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model( xv, yv ) for xv, yv in dataset.valid(batch_size) ] this_validation_loss = numpy.mean( validation_losses ) print('epoch %i, iter %i, validation error %f %%' % \ ( epoch, iter, this_validation_loss*100. ) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold : patience = max( patience, iter * patience_increase ) best_validation_loss = this_validation_loss # test it on the test set test_losses = [test_model(xt, yt) for xt, yt in dataset.test(batch_size)] test_score = numpy.mean(test_losses) print((' epoch %i, iter %i, test error of best ' 'model %f %%') % \ (epoch, iter, test_score*100.)) if patience <= iter : done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % ( best_validation_loss * 100., test_score * 100.)) print ('The code ran for %f minutes' % ((end_time-start_time) / 60.)) return best_validation_loss, test_score, iter*batch_size, (end_time-start_time) / 60.
def jobman_entrypoint(state, channel): # record mercurial versions of each package pylearn.version.record_versions(state,[theano,ift6266,pylearn]) # TODO: remove this, bad for number of simultaneous requests on DB channel.save() # For test runs, we don't want to use the whole dataset so # reduce it to fewer elements if asked to. rtt = None if state.has_key('reduce_train_to'): rtt = state['reduce_train_to'] elif REDUCE_TRAIN_TO: rtt = REDUCE_TRAIN_TO n_ins = 32*32 n_outs = 62 # 10 digits, 26*2 (lower, capitals) examples_per_epoch = NIST_ALL_TRAIN_SIZE PATH = '' NIST_BY_CLASS=0 print "Creating optimizer with state, ", state optimizer = SdaSgdOptimizer(dataset=datasets.nist_all(), hyperparameters=state, \ n_ins=n_ins, n_outs=n_outs,\ examples_per_epoch=examples_per_epoch, \ max_minibatches=rtt) if os.path.exists(PATH+'params_finetune_NIST.txt'): print ('\n finetune = NIST ') optimizer.reload_parameters(PATH+'params_finetune_NIST.txt') if NIST_BY_CLASS == 1: print "NIST DIGITS" optimizer.training_error(datasets.nist_digits(),part=2) print "NIST LOWER CASE" optimizer.training_error(datasets.nist_lower(),part=2) print "NIST UPPER CASE" optimizer.training_error(datasets.nist_upper(),part=2) else: print "P07 valid" optimizer.training_error(datasets.nist_P07(),part=1) print "PNIST valid" optimizer.training_error(datasets.PNIST07(),part=1) if os.path.exists(PATH+'params_finetune_P07.txt'): print ('\n finetune = P07 ') optimizer.reload_parameters(PATH+'params_finetune_P07.txt') if NIST_BY_CLASS == 1: print "NIST DIGITS" optimizer.training_error(datasets.nist_digits(),part=2) print "NIST LOWER CASE" optimizer.training_error(datasets.nist_lower(),part=2) print "NIST UPPER CASE" optimizer.training_error(datasets.nist_upper(),part=2) else: print "P07 valid" optimizer.training_error(datasets.nist_P07(),part=1) print "PNIST valid" optimizer.training_error(datasets.PNIST07(),part=1) if os.path.exists(PATH+'params_finetune_NIST_then_P07.txt'): print ('\n finetune = NIST then P07') optimizer.reload_parameters(PATH+'params_finetune_NIST_then_P07.txt') if NIST_BY_CLASS == 1: print "NIST DIGITS" optimizer.training_error(datasets.nist_digits(),part=2) print "NIST LOWER CASE" optimizer.training_error(datasets.nist_lower(),part=2) print "NIST UPPER CASE" optimizer.training_error(datasets.nist_upper(),part=2) else: print "P07 valid" optimizer.training_error(datasets.nist_P07(),part=1) print "PNIST valid" optimizer.training_error(datasets.PNIST07(),part=1) if os.path.exists(PATH+'params_finetune_P07_then_NIST.txt'): print ('\n finetune = P07 then NIST') optimizer.reload_parameters(PATH+'params_finetune_P07_then_NIST.txt') if NIST_BY_CLASS == 1: print "NIST DIGITS" optimizer.training_error(datasets.nist_digits(),part=2) print "NIST LOWER CASE" optimizer.training_error(datasets.nist_lower(),part=2) print "NIST UPPER CASE" optimizer.training_error(datasets.nist_upper(),part=2) else: print "P07 valid" optimizer.training_error(datasets.nist_P07(),part=1) print "PNIST valid" optimizer.training_error(datasets.PNIST07(),part=1) if os.path.exists(PATH+'params_finetune_PNIST07.txt'): print ('\n finetune = PNIST07') optimizer.reload_parameters(PATH+'params_finetune_PNIST07.txt') if NIST_BY_CLASS == 1: print "NIST DIGITS" optimizer.training_error(datasets.nist_digits(),part=2) print "NIST LOWER CASE" optimizer.training_error(datasets.nist_lower(),part=2) print "NIST UPPER CASE" optimizer.training_error(datasets.nist_upper(),part=2) else: print "P07 valid" optimizer.training_error(datasets.nist_P07(),part=1) print "PNIST valid" optimizer.training_error(datasets.PNIST07(),part=1) if os.path.exists(PATH+'params_finetune_PNIST07_then_NIST.txt'): print ('\n finetune = PNIST07 then NIST') optimizer.reload_parameters(PATH+'params_finetune_PNIST07_then_NIST.txt') if NIST_BY_CLASS == 1: print "NIST DIGITS" optimizer.training_error(datasets.nist_digits(),part=2) print "NIST LOWER CASE" optimizer.training_error(datasets.nist_lower(),part=2) print "NIST UPPER CASE" optimizer.training_error(datasets.nist_upper(),part=2) else: print "P07 valid" optimizer.training_error(datasets.nist_P07(),part=1) print "PNIST valid" optimizer.training_error(datasets.PNIST07(),part=1) channel.save() return channel.COMPLETE
def test_error(model_file): print((' test error on all NIST')) # load the model a=numpy.load(model_file) W1=a['W1'] W2=a['W2'] b1=a['b1'] b2=a['b2'] configuration=a['config'] #configuration = [learning_rate,nb_max_exemples,nb_hidden,adaptive_lr] learning_rate = configuration[0] nb_max_exemples = configuration[1] nb_hidden = configuration[2] adaptive_lr = configuration[3] if(len(configuration) == 6): detection_mode = configuration[4] reduce_label = configuration[5] else: detection_mode = 0 reduce_label = 0 # define the batch size batch_size=20 #define the nb of target nb_targets = 62 # create the mlp ishape = (32,32) # this is the size of NIST images # allocate symbolic variables for the data x = T.fmatrix() # the data is presented as rasterized images y = T.lvector() # the labels are presented as 1D vector of # [long int] labels # construct the logistic regression class classifier = MLP( input=x,\ n_in=32*32,\ n_hidden=nb_hidden,\ n_out=nb_targets, learning_rate=learning_rate,\ detection_mode=detection_mode) # set the weight into the model classifier.W1.value = W1 classifier.b1.value = b1 classifier.W2.value = W2 classifier.b2.value = b2 # compiling a theano function that computes the mistakes that are made by # the model on a minibatch test_model = theano.function([x,y], classifier.errors(y)) # test it on the test set # load NIST ALL dataset=datasets.nist_all() test_score = 0. temp =0 for xt,yt in dataset.test(batch_size): if reduce_label: yt[yt > 35] = yt[yt > 35]-26 test_score += test_model(xt,yt) temp = temp+1 test_score /= temp print(( ' test error NIST ALL : %f %%') %(test_score*100.0)) # load NIST DIGITS dataset=datasets.nist_digits() test_score = 0. temp =0 for xt,yt in dataset.test(batch_size): if reduce_label: yt[yt > 35] = yt[yt > 35]-26 test_score += test_model(xt,yt) temp = temp+1 test_score /= temp print(( ' test error NIST digits : %f %%') %(test_score*100.0)) # load NIST lower dataset=datasets.nist_lower() test_score = 0. temp =0 for xt,yt in dataset.test(batch_size): if reduce_label: yt[yt > 35] = yt[yt > 35]-26 test_score += test_model(xt,yt) temp = temp+1 test_score /= temp print(( ' test error NIST lower : %f %%') %(test_score*100.0)) # load NIST upper dataset=datasets.nist_upper() test_score = 0. temp =0 for xt,yt in dataset.test(batch_size): if reduce_label: yt[yt > 35] = yt[yt > 35]-26 test_score += test_model(xt,yt) temp = temp+1 test_score /= temp print(( ' test error NIST upper : %f %%') %(test_score*100.0))
def jobman_entrypoint(state, channel): global TEST_RUN minibatch_size = state.minibatch_size print_every = 100000 COMPUTE_ERROR_EVERY = 10**7 / minibatch_size # compute error every 10 million examples if TEST_RUN: print_every = 100 COMPUTE_ERROR_EVERY = 1000 / minibatch_size print "entrypoint, state is" print state ###################### # select dataset and dataset subset, plus adjust epoch num to make number # of examples seen independent of dataset # exemple: pour le cas DIGITS_ONLY, il faut changer le nombre d'époques # et pour le cas NIST pur (pas de transformations), il faut multiplier par 100 # en partant car on a pas les variations # compute this in terms of the P07 dataset size (=80M) MINIBATCHES_TO_SEE = state.n_epochs * 8 * (10**6) / minibatch_size if state.train_on == 'NIST' and state.train_subset == 'ALL': dataset_obj = datasets.nist_all() elif state.train_on == 'NIST' and state.train_subset == 'DIGITS_ONLY': dataset_obj = datasets.nist_digits() elif state.train_on == 'NISTP' and state.train_subset == 'ALL': dataset_obj = datasets.PNIST07() elif state.train_on == 'NISTP' and state.train_subset == 'DIGITS_ONLY': dataset_obj = PNIST07_digits elif state.train_on == 'P07' and state.train_subset == 'ALL': dataset_obj = datasets.nist_P07() elif state.train_on == 'P07' and state.train_subset == 'DIGITS_ONLY': dataset_obj = datasets.P07_digits dataset = dataset_obj if state.train_subset == 'ALL': n_classes = 62 elif state.train_subset == 'DIGITS_ONLY': n_classes = 10 else: raise NotImplementedError() ############################### # construct model print "constructing model..." x = T.matrix('x') y = T.ivector('y') rng = numpy.random.RandomState(state.rng_seed) # construct the MLP class model = MLP(rng = rng, input=x, n_in=N_INPUTS, n_hidden_layers = state.n_hidden_layers, n_hidden = state.n_hidden, n_out=n_classes) # cost and training fn cost = T.mean(model.negative_log_likelihood(y)) \ + state.L1_reg * model.L1 \ + state.L2_reg * model.L2_sqr print "L1, L2: ", state.L1_reg, state.L2_reg gradient_nll_wrt_params = [] for param in model.params: gparam = T.grad(cost, param) gradient_nll_wrt_params.append(gparam) learning_rate = 10**float(state.learning_rate_log10) print "Learning rate", learning_rate train_updates = {} for param, gparam in zip(model.params, gradient_nll_wrt_params): train_updates[param] = param - learning_rate * gparam train_fn = theano.function([x,y], cost, updates=train_updates) ####################### # create series basedir = os.getcwd() h5f = tables.openFile(os.path.join(basedir, "series.h5"), "w") series = {} add_error_series(series, "training_error", h5f, index_names=('minibatch_idx',), use_accumulator=True, reduce_every=REDUCE_EVERY) ########################## # training loop start_time = time.clock() print "begin training..." print "will train for", MINIBATCHES_TO_SEE, "examples" mb_idx = 0 while(mb_idx*minibatch_size<nb_max_exemples): last_costs = [] for mb_x, mb_y in dataset.train(minibatch_size): if TEST_RUN and mb_idx > 1000: break last_cost = train_fn(mb_x, mb_y) series["training_error"].append((mb_idx,), last_cost) last_costs.append(last_cost) if (len(last_costs)+1) % print_every == 0: print "Mean over last", print_every, "minibatches: ", numpy.mean(last_costs) last_costs = [] if (mb_idx+1) % COMPUTE_ERROR_EVERY == 0: # compute errors print "computing errors on all datasets..." print "Time since training began: ", (time.clock()-start_time)/60., "minutes" compute_and_save_errors(state, model, series, h5f, mb_idx) channel.save() sys.stdout.flush() end_time = time.clock() print "-"*80 print "Finished. Training took", (end_time-start_time)/60., "minutes" print state
index_names=('iter',), title='Test error (class)') return series class PrintSeries(object): def append(self, idx, v): print idx, v if __name__ == '__main__': from ift6266 import datasets from sgd_opt import sgd_opt import sys, time batch_size = 100 dset = datasets.nist_digits(1000) pretrain_funcs, trainf, evalf, net = build_funcs( img_size = (32, 32), batch_size=batch_size, filter_sizes=[(5,5), (3,3)], num_filters=[20, 4], subs=[(2,2), (2,2)], noise=[0.2, 0.2], mlp_sizes=[500], out_size=10, dtype=numpy.float32, pretrain_lr=0.001, train_lr=0.1) t_it = repeat_itf(dset.train, batch_size) pretrain_fs, train, valid, test = massage_funcs( t_it, t_it, dset, batch_size, pretrain_funcs, trainf, evalf) print "pretraining ...", sys.stdout.flush()