def main(): dataset_file = 'mnist.npz' if not isfile(dataset_file): downloadMnist(dataset_file) data = dict(np.load(dataset_file)) data['x_tr'] = data['x_tr'] * 2.0 - 1.0 data['x_va'] = data['x_va'] * 2.0 - 1.0 data['x_te'] = data['x_te'] * 2.0 - 1.0 data['t_tr'] = data['t_tr'].astype(np.int32) data['t_va'] = data['t_va'].astype(np.int32) data['t_te'] = data['t_te'].astype(np.int32) rng = np.random.RandomState() srng = RandomStreamsGPU(rng.randint(1, 2147462579, size=(6,))) # Setup data loaders train_generator = DefaultDataLoader(data['x_tr'], data['t_tr'], 100, rng=rng) validation_generator = DefaultDataLoader(data['x_va'], data['t_va'], 100) test_generator = DefaultDataLoader(data['x_te'], data['t_te'], 100) # Load real-valued model parameters for initialization init_model_file = 'mnist_pi_model_ternary_tanh.npz' if isfile(init_model_file): initial_parameters = dict(np.load(init_model_file)) print 'Loading initial parameters from \'%s\'' % (init_model_file) print 'Parameters:', [e for e in initial_parameters] else: raise Exception('Cannot find initial model \'%s\'' % (init_model_file)) # Create model global parameters layer, parameters = getMnistPIModel(initial_parameters, rng, srng) # Do optimization print layer.getMessage() cbErrVaDecreased = lambda : cbValidationErrorDecreased() global p_vals optimizeNetwork(layer, loader_tr=train_generator, loader_va=validation_generator, loader_te=test_generator, optimization_algorithm='adam', step_size=1e-3, step_size_discrete=1e-2, step_size_scale_fn={'type' : 'plateau', 'monitor' : 'ce_va', 'cooldown' : 50, 'patience' : 10, 'factor' : 0.5}, n_epochs=500, do_bn_updates_after_epoch=True, callback_validation_error_decreased=[(cbErrVaDecreased, [])]) # Store model parameters. The model parameters of the best model according to the validation error are now in # p_vals. model_file = 'mnist_pi_model_ternary_sign_from_tanh.npz' print 'Optimization finished. Storing model parameters to ''%s''' % model_file np.savez_compressed(model_file, **p_vals)
def main(): dataset_file = 'mnist.npz' if not isfile(dataset_file): downloadMnist(dataset_file) data = dict(np.load(dataset_file)) data['x_tr'] = data['x_tr'] * 2.0 - 1.0 data['x_va'] = data['x_va'] * 2.0 - 1.0 data['x_te'] = data['x_te'] * 2.0 - 1.0 data['t_tr'] = data['t_tr'].astype(np.int32) data['t_va'] = data['t_va'].astype(np.int32) data['t_te'] = data['t_te'].astype(np.int32) rng = np.random.RandomState() srng = RandomStreamsGPU(rng.randint(1, 2147462579, size=(6, ))) # Setup data loaders train_generator = DefaultDataLoader(data['x_tr'], data['t_tr'], 100, rng=rng) validation_generator = DefaultDataLoader(data['x_va'], data['t_va'], 100) test_generator = DefaultDataLoader(data['x_te'], data['t_te'], 100) # Create model global parameters layer, parameters = getMnistPIModel(rng, srng) # Do optimization print layer.getMessage() cbErrVaDecreased = lambda: cbValidationErrorDecreased() global p_vals optimizeNetwork(layer, loader_tr=train_generator, loader_va=validation_generator, loader_te=test_generator, optimization_algorithm='adam', step_size=1e-3, step_size_scale_fn={ 'type': 'plateau', 'monitor': 'ce_va', 'cooldown': 150, 'patience': 25, 'factor': 0.5 }, n_epochs=1000, callback_validation_error_decreased=[(cbErrVaDecreased, []) ]) # Store model parameters. The model parameters of the best model according to the validation error are now in # p_vals. model_file = 'mnist_pi_model_real.npz' print 'Optimization finished. Storing model parameters to ' '%s' '' % model_file np.savez_compressed(model_file, **p_vals)
def main(): dataset_file = 'cifar100.npz' if not isfile(dataset_file): downloadCifar100(dataset_file) data = dict(np.load(dataset_file)) data['x_tr'] = ((data['x_tr'] / 255.0 * 2.0) - 1.0).astype( np.float32).reshape(-1, 3, 32, 32) data['x_va'] = ((data['x_va'] / 255.0 * 2.0) - 1.0).astype( np.float32).reshape(-1, 3, 32, 32) data['x_te'] = ((data['x_te'] / 255.0 * 2.0) - 1.0).astype( np.float32).reshape(-1, 3, 32, 32) data['t_tr'] = data['t_tr'].astype(np.int32) data['t_va'] = data['t_va'].astype(np.int32) data['t_te'] = data['t_te'].astype(np.int32) rng = np.random.RandomState() srng = RandomStreamsGPU(rng.randint(1, 2147462579, size=(6, ))) # Setup data loaders train_generator = Cifar10FlipShiftDataLoader(data['x_tr'], data['t_tr'], 100, flip_axis=1, max_shift=4, requires_train=True, rng=rng) validation_generator = DefaultDataLoader(data['x_va'], data['t_va'], 100) test_generator = DefaultDataLoader(data['x_te'], data['t_te'], 100) # Create model global parameters layer, parameters = getCifar100Model(rng, srng) # Do optimization print layer.getMessage() cbErrVaDecreased = lambda: cbValidationErrorDecreased() global p_vals optimizeNetwork(layer, loader_tr=train_generator, loader_va=validation_generator, loader_te=test_generator, optimization_algorithm='adam', step_size=3e-4, step_size_scale_fn={ 'type': 'plateau', 'monitor': 'ce_va', 'cooldown': 50, 'patience': 10, 'factor': 0.5 }, n_epochs=500, callback_validation_error_decreased=[(cbErrVaDecreased, []) ]) # Store model parameters. The model parameters of the best model according to the validation error are now in # p_vals. model_file = 'cifar100_model_real.npz' print 'Optimization finished. Storing model parameters to ' '%s' '' % model_file np.savez_compressed(model_file, **p_vals)
def SamplingdenseSDAEexp(state, channel): """ This script launch a SDAE experiment, training in a greedy layer wise fashion. The hidden layer activation function is the rectifier activation (i.e. max(0,y)). The reconstruction activation function is the sigmoid. The reconstruction cost is the cross-entropy. From one layer to the next we need to scale the parameters in order to ensure that the representation is in the interval [0,1]. The noise of the input layer is a salt and pepper noise ('binomial_NLP'), for deeper layers it is a zero masking noise (binomial). """ SavePath = channel.remote_path + '/' if hasattr( channel, 'remote_path') else channel.path + '/' numpy.random.seed(state.seed) Wenc, benc = createWbshared(numpy.random, state.n_inp, state.n_hid, 'enc') Wdec, bdec = createWbshared(numpy.random, state.n_hid, state.n_inp, 'dec') # Load the entire training data full_train = vectosparsemat(data_path + state['path_data'], state['ninputs']) full_test = vectosparsemat(data_path + state['path_data_test'], state['ninputs']) full_train = full_train[numpy.random.permutation(full_train.shape[0]), :] NB_DENSE_train = int( numpy.ceil(full_train.shape[0] / float(state['dense_size']))) NB_DENSE_test = int( numpy.ceil(full_test.shape[0] / float(state['dense_size']))) # Create the dense batch shared variable train = theano.shared( createdensebatch(full_train, state['dense_size'], 0)[0]) zer_mask_shared = theano.shared( numpy.asarray(numpy.random.binomial(n=1, p=1 - state.zeros, size=train.value.shape), dtype=theano.config.floatX)) one_mask_shared = theano.shared( numpy.asarray(numpy.random.binomial(n=1, p=state.ones, size=train.value.shape), dtype=theano.config.floatX)) #------------------------------ state.bestindomain = -1 state.bestindomainstd = -1 state.bestindomainval = -1 state.bestindomainvalstd = -1 state.bestindomainvalde = -1 state.bestrec = -1 state.bestrecde = -1 state.bestonlinerec = -1 state.bestonlinerecde = -1 epochsl = [] indomain = [] indomainstd = [] indomainval = [] indomainvalstd = [] rec = [] recdense = [] reconline = [] #------------------------------- # Model initialization: inp = T.matrix() RandomStreams = RandomStreamsGPU(state.seed) zer_mask = T.matrix() one_mask = T.matrix() if state.pattern == 'inp': pattern = T.cast((inp + RandomStreams.binomial( size=inp.shape, n=1, p=state.ratio, dtype=theano.config.floatX)) > 0, dtype=theano.config.floatX) elif state.pattern == 'noise': pattern = T.cast( ((1 - zer_mask) * inp + one_mask + RandomStreams.binomial( size=inp.shape, n=1, p=state.ratio, dtype=theano.config.floatX)) > 0, dtype=theano.config.floatX) elif state.pattern == 'inpnoise': pattern = T.cast((inp + one_mask + RandomStreams.binomial( size=inp.shape, n=1, p=state.ratio, dtype=theano.config.floatX)) > 0, dtype=theano.config.floatX) elif state.pattern == 'random': pattern = RandomStreams.binomial(size=inp.shape, n=1, p=state.ratio, dtype=theano.config.floatX) elif state.pattern == None: pattern = None else: assert False inp_noise = binomial_NLP_noise(inp, zer_mask, one_mask) hid_lin = T.dot(inp_noise, Wenc) + benc if state.act == 'rect': hid_out = hid_lin * (hid_lin > 0) if state.act == 'sigmoid': hid_out = T.nnet.sigmoid(hid_lin) L1_reg = T.mean(T.sum(hid_out * hid_out, axis=1), axis=0) rec_lin = T.dot(hid_out, Wdec) + bdec # the sigmoid is inside the cross_entropy function. if not hasattr(state, 'scaling'): state.scaling = False if state.cost == 'CE': cost, dum = cross_entropy_sampled_cost(inp, rec_lin, pattern, state.scaling) cost_dense, cost_decoupled_dense = cross_entropy_sampled_cost( inp, rec_lin, None) if state.cost == 'MSE': cost, dum = MSE_sampled_cost(inp, rec_lin, pattern, state.scaling) cost_dense, cost_decoupled_dense = MSE_sampled_cost(inp, rec_lin, None) if state.regcoef != 0.: cost = cost + state.regcoef * L1_reg grad = T.grad(cost, [Wenc, Wdec, benc, bdec]) updates = dict( (p, p - state.lr * g) for p, g in zip([Wenc, Wdec, benc, bdec], grad)) givens = {} index = T.lscalar() givens.update( {inp: train[index * state.batchsize:(index + 1) * state.batchsize]}) givens.update({ zer_mask: zer_mask_shared[index * state.batchsize:(index + 1) * state.batchsize] }) givens.update({ one_mask: one_mask_shared[index * state.batchsize:(index + 1) * state.batchsize] }) TRAINFUNC = theano.function([index], cost, updates=updates, givens=givens) #givens = {} #givens.update({inp:train[index*state.batchsizeerr:(index+1)*state.batchsizeerr]}) #givens.update({zer_mask:zer_mask_shared[index*state.batchsizeerr:(index+1)*state.batchsizeerr]}) #givens.update({one_mask:one_mask_shared[index*state.batchsizeerr:(index+1)*state.batchsizeerr]}) #ERRNOISE = theano.function([index],[cost_dense,cost_decoupled_dense], givens = givens) givens = {} givens.update({ inp: train[index * state.batchsizeerr:(index + 1) * state.batchsizeerr] }) givens.update({ zer_mask: zer_mask_shared[index * state.batchsizeerr:(index + 1) * state.batchsizeerr] }) givens.update({ one_mask: one_mask_shared[index * state.batchsizeerr:(index + 1) * state.batchsizeerr] }) ERR = theano.function([index], [cost_dense, cost_decoupled_dense], givens=givens) # Train the current DAE for epoch in range(state['nepochs']): # Load sequentially dense batches of the training data reconstruction_error_batch = 0 update_count1 = 0 for batchnb in range(NB_DENSE_train): train.container.value[:], realsize = createdensebatch( full_train, state.dense_size, batchnb) zer_mask_shared.container.value[:] = numpy.asarray( numpy.random.binomial(n=1, p=1 - state.zeros, size=train.value.shape), dtype=theano.config.floatX) one_mask_shared.container.value[:] = numpy.asarray( numpy.random.binomial(n=1, p=state.ones, size=train.value.shape), dtype=theano.config.floatX) for j in range(realsize / state.batchsize): tmp = TRAINFUNC(j) reconstruction_error_batch += tmp update_count1 += 1 print >> sys.stderr, "\t\tAt depth %d, epoch %d, finished training over batch %s" % ( 1, epoch + 1, batchnb + 1) print >> sys.stderr, "\t\tMean reconstruction error %s" % ( reconstruction_error_batch / float(update_count1)) print >> sys.stderr, '...finished training epoch #%s' % (epoch + 1) full_train = full_train[ numpy.random.permutation(full_train.shape[0]), :] if epoch + 1 in state.epochs: #rec test err update_count2 = 0 test_recerr = 0 test_recerrd = numpy.zeros((state.ninputs, )) for batchnb in range(NB_DENSE_test): train.container.value[:], realsize = createdensebatch( full_test, state.dense_size, batchnb) zer_mask_shared.container.value[:] = numpy.ones( train.value.shape, dtype=theano.config.floatX) one_mask_shared.container.value[:] = numpy.zeros( train.value.shape, dtype=theano.config.floatX) for j in range(realsize / state.batchsizeerr): # Update function recerr, recerrd = ERR(j) test_recerr += recerr test_recerrd += recerrd update_count2 += 1 if not os.path.isdir(SavePath): os.mkdir(SavePath) modeldir = os.path.join(SavePath, 'currentmodel') if not os.path.isdir(modeldir): os.mkdir(modeldir) f = open(modeldir + '/params.pkl', 'w') cPickle.dump(Wenc.value, f, -1) cPickle.dump(Wdec.value, f, -1) cPickle.dump(benc.value, f, -1) cPickle.dump(bdec.value, f, -1) f.close() createdatafiles(1, Wenc, benc, SavePath, state.small, state.ninputs, state.act) currentresults = validtest(1, SavePath, state.small, state.folds, state.ninputs) epochsl += [epoch + 1] indomainval += [currentresults[1][0]] indomainvalstd += [currentresults[1][1]] indomain += [currentresults[0][0]] indomainstd += [currentresults[0][1]] rec += [test_recerr / float(update_count2)] recdense += [test_recerrd / float(update_count2)] reconline += [reconstruction_error_batch / float(update_count1)] print '###### RESULTS :' print 'Depth:', 1 print 'Epoch:', epoch + 1 print 'Online Reconstruction:', reconstruction_error_batch / float( update_count1) print 'Reconstruction:', test_recerr / float(update_count2) print 'in-domain val:', currentresults[1][ 0], '+/-', currentresults[1][1] print 'in-domain test:', currentresults[0][ 0], '+/-', currentresults[0][1] print ' ' f = open('results.pkl', 'w') cPickle.dump(epochsl, f, -1) cPickle.dump(rec, f, -1) cPickle.dump(recdense, f, -1) cPickle.dump(reconline, f, -1) cPickle.dump((indomainval, indomainvalstd), f, -1) cPickle.dump((indomain, indomainstd), f, -1) f.close() if test_recerr / float( update_count2) < state.bestrec or state.bestrec == -1: state.bestrec = test_recerr / float(update_count2) state.bestrecde = (1, epoch + 1) if reconstruction_error_batch / float( update_count1 ) < state.bestonlinerec or state.bestonlinerec == -1: state.bestonlinerec = reconstruction_error_batch / float( update_count1) state.bestonlinerecde = (1, epoch + 1) if currentresults[1][ 0] < state.bestindomainval or state.bestindomainval == -1: modeldir = os.path.join(SavePath, 'bestmodel') if not os.path.isdir(modeldir): os.mkdir(modeldir) f = open(modeldir + '/params.pkl', 'w') cPickle.dump(Wenc.value, f, -1) cPickle.dump(Wdec.value, f, -1) cPickle.dump(benc.value, f, -1) cPickle.dump(bdec.value, f, -1) f.close() state.bestindomain = currentresults[0][0] state.bestindomainstd = currentresults[0][1] state.bestindomainval = currentresults[1][0] state.bestindomainvalstd = currentresults[1][1] state.bestindomainvalde = (1, epoch + 1) state.currentepoch = epoch + 1 channel.save() return channel.COMPLETE