Пример #1
0
def test_dA(learning_rate=0.1, training_epochs=20,
			dataset='mnist.pkl.gz',
			batch_size=20, output_folder='dA_data', mode='train', amount='full'):

	"""
	This demo is tested on MNIST

	:type learning_rate: float
	:param learning_rate: learning rate used for training the DeNosing
						  AutoEncoder

	:type training_epochs: int
	:param training_epochs: number of epochs used for training

	:type dataset: string
	:param dataset: path to the picked dataset

	"""
	datasets = load_data(dataset, mode, amount)
	train_set_x, train_set_y = datasets[0]

	# compute number of minibatches for training, validation and testing
	n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

	# allocate symbolic variables for the data
	index = T.lscalar()	# index to a [mini]batch
	x = T.matrix('x')  # the data is presented as rasterized images

	'''
	if not os.path.isdir(output_folder):
		os.makedirs(output_folder)
	os.chdir(output_folder)
	'''
	######################
	# BUILDING THE MODEL #
	######################

	for noize in [0, 10, 20, 30, 40, 50]:
		print 'noize:', str(noize), '%'

		rng = numpy.random.RandomState(123)
		theano_rng = RandomStreams(rng.randint(2 ** 30))

		da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x,
				n_visible=32 * 32, n_hidden=784) # same as MNIST (28*28=784)

		cost, updates = da.get_cost_updates(corruption_level=noize/100.,
											learning_rate=learning_rate)

		train_da = theano.function([index], cost, updates=updates,
			 givens={x: train_set_x[index * batch_size:
									(index + 1) * batch_size]})
		
		comp_data = da.get_comp_values()

		get_comp_data = theano.function([index], comp_data,
			 givens={x: train_set_x[index * batch_size:
									(index + 1) * batch_size]})

		start_time = time.clock()

		############
		# TRAINING #
		############

		# go through training epochs
		for epoch in xrange(training_epochs):
			# go through trainng set
			c = []
			for batch_index in xrange(n_train_batches):
				c.append(train_da(batch_index))

			print 'Training epoch %d, cost ' % epoch, numpy.mean(c)

		end_time = time.clock()

		training_time = (end_time - start_time)

		print >> sys.stderr, ('The ' + str(noize)  +'% corruption code for file ' +
							  os.path.split(__file__)[1] +
							  ' ran for %.2fm' % ((training_time) / 60.))

		## save parameters
		get_params = theano.function(inputs=[], outputs=[da.W, da.b, da.b_prime])
		save_parameters(get_params(), 'dA_' + str(noize))

		## save compressed data (no corruption)
		data_da_0 = [[0 for j in xrange(28*28)] for i in xrange(n_train_batches*batch_size)]
		
		for batch_index in xrange(n_train_batches):
			comp_x = get_comp_data(batch_index)
		
			for i in xrange(batch_size):
				comp_x[i] = numpy.asarray(comp_x[i], dtype=numpy.float64)
				data_da_0[batch_index * batch_size + i] = comp_x[i]
		
		data_da_0 = numpy.asarray(data_da_0)
		pickle(data_da_0, 'dA_data/' + mode + '_data_da_' + str(noize) + '.pkl')
def evaluate_lenet5(learning_rate=0.1, learning_rate2=0.05, learning_rate3=0.01, n_epochs=200,
					dataset='cifar-10-batches-py',
					nkerns=[6, 16], batch_size=20, mode='train', amount='full'): # nkerns coule be ok with [10, 50]
	""" Demonstrates lenet on MNIST dataset

	:type learning_rate: float
	:param learning_rate: learning rate used (factor for the stochastic
						  gradient)

	:type n_epochs: int
	:param n_epochs: maximal number of epochs to run the optimizer

	:type dataset: string
	:param dataset: path to the dataset used for training /testing (MNIST here)

	:type nkerns: list of ints
	:param nkerns: number of kernels on each layer
	"""

	#learning_rate = theano.shared(value=learning_rate, borrow=True)

	rng = numpy.random.RandomState(23455)

	datasets = load_data(dataset, mode=mode, amount=amount)

	if mode == 'train':
		train_set_x, train_set_y = datasets[0]
		valid_set_x, valid_set_y = datasets[1]
	else:
		test_set_x, test_set_y = datasets[0]

	# compute number of minibatches for training, validation and testing
	if mode == 'train':
		n_train_batches = train_set_x.get_value(borrow=True).shape[0]
		n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
		n_train_batches /= batch_size
		n_valid_batches /= batch_size
	else:
		n_test_batches = test_set_x.get_value(borrow=True).shape[0]
		n_test_batches /= batch_size

	# allocate symbolic variables for the data
	index = T.lscalar()  # index to a [mini]batch
	x = T.matrix('x')   # the data is presented as rasterized images
	y = T.ivector('y')  # the labels are presented as 1D vector of
						# [int] labels

	ishape = (32, 32)  # this is the size of CIFIA-10 images (gray-scaled)

	######################
	# BUILD ACTUAL MODEL #
	######################
	print '... building the model'

	# Reshape matrix of rasterized images of shape (batch_size,32*32)
	# to a 4D tensor, compatible with our LeNetConvPoolLayer
	layer0_input = x.reshape((batch_size, 1, 32, 32))

	# Construct the first convolutional pooling layer:
	# filtering reduces the image size to (32-5+1,32-5+1)=(28,28)
	# maxpooling reduces this further to (28/2,28/2) = (14,14)
	# 4D output tensor is thus of shape (batch_size,nkerns[0],14,14)
	layer0 = LeNetConvPoolLayer(rng, input=layer0_input,
			image_shape=(batch_size, 1, 32, 32),
			filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2))

	# Construct the second convolutional pooling layer
	# filtering reduces the image size to (14-5+1,14-5+1)=(10,10)
	# maxpooling reduces this further to (10/2,10/2) = (5,5)
	# 4D output tensor is thus of shape (nkerns[0],nkerns[1],5,5)
	layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
			image_shape=(batch_size, nkerns[0], 14, 14),
			filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2))

	# the HiddenLayer being fully-connected, it operates on 2D matrices of
	# shape (batch_size,num_pixels) (i.e matrix of rasterized images).
	# This will generate a matrix of shape (20,50*5*5) = (20,1250) <-??
	layer2_input = layer1.output.flatten(2)

	# construct a fully-connected sigmoidal layer
	layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 5 * 5,
						 n_out=500, activation=T.tanh)

	# classify the values of the fully-connected sigmoidal layer
	layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)

	## load the saved parameters
	if mode == 'test':
		learned_params = unpickle('params/convolutional_mlp_gray.pkl')

	# the cost we minimize during training is the NLL of the model
	cost = layer3.negative_log_likelihood(y)

	# create a function to compute the mistakes that are made by the model
	if mode == 'test':
		test_model = theano.function([index], layer3.errors(y),
				givens={
					x: test_set_x[index * batch_size: (index + 1) * batch_size],
					y: test_set_y[index * batch_size: (index + 1) * batch_size]})
	else:
		validate_model = theano.function([index], layer3.errors(y),
				givens={
					x: valid_set_x[index * batch_size: (index + 1) * batch_size],
					y: valid_set_y[index * batch_size: (index + 1) * batch_size]})

		check_label = theano.function(inputs=[index],
				outputs=layer3.y_pair(y),
					givens={
						x: train_set_x[index * batch_size: (index + 1) * batch_size],
						y: train_set_y[index * batch_size: (index + 1) * batch_size]})

	# create a function to get the labels predicted by the model
	if mode == 'test':
		get_test_labels = theano.function([index], layer3.y_pred,
				givens={
					x: test_set_x[index * batch_size: (index + 1) * batch_size],
					layer0.W: learned_params[0],
					layer0.b: learned_params[1],
					layer1.W: learned_params[2],
					layer1.b: learned_params[3],
					layer2.W: learned_params[4],
					layer2.b: learned_params[5],
					layer3.W: learned_params[6],
					layer3.b: learned_params[7]})


	if mode == 'train':
		# create a list of all model parameters to be fit by gradient descent
		params = layer3.params + layer2.params + layer1.params + layer0.params
	
		# create a list of gradients for all model parameters
		grads = T.grad(cost, params)

	# train_model is a function that updates the model parameters by
	# SGD Since this model has many parameters, it would be tedious to
	# manually create an update rule for each model parameter. We thus
	# create the updates list by automatically looping over all
	# (params[i],grads[i]) pairs.
	if mode == 'train':
		updates = []
		for param_i, grad_i in zip(params, grads):
			updates.append((param_i, param_i - learning_rate * grad_i))

		updates2 = []
		for param_i, grad_i in zip(params, grads):
			updates2.append((param_i, param_i - learning_rate2 * grad_i))

		updates3 = []
		for param_i, grad_i in zip(params, grads):
			updates3.append((param_i, param_i - learning_rate3 * grad_i))

	if mode == 'train':
		train_model = theano.function([index], cost, updates=updates,
			  givens={
				x: train_set_x[index * batch_size: (index + 1) * batch_size],
				y: train_set_y[index * batch_size: (index + 1) * batch_size]})
		
		train_model2 = theano.function([index], cost, updates=updates2,
			  givens={
				x: train_set_x[index * batch_size: (index + 1) * batch_size],
				y: train_set_y[index * batch_size: (index + 1) * batch_size]})

		train_model3 = theano.function([index], cost, updates=updates3,
			  givens={
				x: train_set_x[index * batch_size: (index + 1) * batch_size],
				y: train_set_y[index * batch_size: (index + 1) * batch_size]})

	###############
	# TRAIN MODEL #
	###############
	print '... training the model'
	# early-stopping parameters
	if mode == 'train':
		patience = 10000  # look as this many examples regardless
		patience_increase = 2  # wait this much longer when a new best is
							   # found
		improvement_threshold = 0.999  # a relative improvement of this much is
									   # considered significant
		validation_frequency = min(n_train_batches, patience / 2)
								  # go through this many
								  # minibatche before checking the network
								  # on the validation set; in this case we
								  # check every epoch

	start_time = time.clock()

	if mode == 'train':
		best_params = None
		best_validation_loss = numpy.inf
		best_iter = 0
		test_score = 0.
		done_looping = False
	else:
		done_looping = True

	epoch = 0

	while (epoch < n_epochs) and (not done_looping):
		epoch = epoch + 1
		for minibatch_index in xrange(n_train_batches):

			iter = (epoch - 1) * n_train_batches + minibatch_index

			if iter % 100 == 0:
				print 'training @ iter = ', iter

			if epoch == 1:
				cost_ij = train_model(minibatch_index)
			elif this_validation_loss < 0.45 and this_validation_loss > 0.35:
				cost_ij = train_model2(minibatch_index)
			elif this_validation_loss < 0.35:
				cost_ij = train_model3(minibatch_index)
			else:
				cost_ij = train_model(minibatch_index)

			## check the contents of predictions occasionaly
			'''
			if iter % 100 == 0:
				[prediction, true_label] = check_label(minibatch_index)
				print 'prediction:'
				print prediction
				print 'true_label:'
				print true_label
			'''

			## save the parameters
			if mode == 'train':
				get_params = theano.function(inputs=[], outputs=[layer0.W, layer0.b, layer1.W, layer1.b, layer2.W, layer2.b, layer3.W, layer3.b])
				save_parameters(get_params(), 'convolutional_mlp_gray')


			if (iter + 1) % validation_frequency == 0:

				# compute zero-one loss on validation set
				validation_losses = [validate_model(i) for i
									 in xrange(n_valid_batches)]
				this_validation_loss = numpy.mean(validation_losses)
				print('epoch %i, minibatch %i/%i, validation error %f %%' % \
					  (epoch, minibatch_index + 1, n_train_batches, \
					   this_validation_loss * 100.))

				# if we got the best validation score until now
				if this_validation_loss < best_validation_loss:

					#improve patience if loss improvement is good enough
					if this_validation_loss < best_validation_loss *  \
					   improvement_threshold:
						patience = max(patience, iter * patience_increase)

					# save best validation score and iteration number
					best_validation_loss = this_validation_loss
					best_iter = iter

					'''
					# test it on the test set
					test_losses = [test_model(i) for i in xrange(n_test_batches)]
					test_score = numpy.mean(test_losses)
					print(('	 epoch %i, minibatch %i/%i, test error of best '
						   'model %f %%') %
						  (epoch, minibatch_index + 1, n_train_batches,
						   test_score * 100.))
					'''


			'''
			if patience <= iter:
				done_looping = True
				break
			'''


	if mode == 'test':
		print 'predicting the labels...'
		pred_labels = [[0 for j in xrange(batch_size)] for i in xrange(n_test_batches)]
		for i in xrange(n_test_batches):
			print str(i+1), '/', str(n_test_batches)
			pred_labels[i] = get_test_labels(i)

		writer = csv.writer(file('result/convolutional_mlp_gray.csv', 'w'))
		row = 1

		print 'output test labels...'
		for i in xrange(len(pred_labels)): # TBF: hard code
			print str(i+1), '/', str(len(pred_labels))
			for j in xrange(len(pred_labels[i])):
				writer.writerow([row, pred_labels[i][j]])
				row += 1


	end_time = time.clock()
	if mode == 'train':
		print('Optimization complete.')
		print('Best validation score of %f %% obtained at iteration %i,'\
			  'with test performance %f %%' %
			  (best_validation_loss * 100., best_iter + 1, test_score * 100.))
	print >> sys.stderr, ('The code for file ' +
						  os.path.split(__file__)[1] +
						  ' ran for %.2fm' % ((end_time - start_time) / 60.))
Пример #3
0
def test_SdA(finetune_lr=0.1, pretraining_epochs=20, ## originally 15
			 pretrain_lr=0.001, training_epochs=1000,
			 dataset='cifar-10-batches-py', batch_size=1, mode='train', amount='full'):
	"""
	Demonstrates how to train and test a stochastic denoising autoencoder.

	This is demonstrated on MNIST.

	:type learning_rate: float
	:param learning_rate: learning rate used in the finetune stage
	(factor for the stochastic gradient)

	:type pretraining_epochs: int
	:param pretraining_epochs: number of epoch to do pretraining

	:type pretrain_lr: float
	:param pretrain_lr: learning rate to be used during pre-training

	:type n_iter: int
	:param n_iter: maximal number of iterations ot run the optimizer

	:type dataset: string
	:param dataset: path the the pickled dataset

	"""

	datasets = load_data(dataset, mode=mode, amount=amount)

	if mode == 'train':
		train_set_x, train_set_y = datasets[0]
		valid_set_x, valid_set_y = datasets[1]
	else:
		test_set_x, test_set_y = datasets[0]

	# compute number of minibatches for training, validation and testing
	if mode == 'train':
		n_train_batches = train_set_x.get_value(borrow=True).shape[0]
		n_train_batches /= batch_size
	else:
		n_test_batches = test_set_x.get_value(borrow=True).shape[0]
		n_test_batches /= batch_size

	# numpy random generator
	numpy_rng = numpy.random.RandomState(89677)


	# allocate symbolic variables for the data
	index = T.lscalar()  # index to a [mini]batch
	x = T.matrix('x')   # the data is presented as rasterized images
	y = T.ivector('y')  # the labels are presented as 1D vector of

	print '... building the model'
	# construct the stacked denoising autoencoder class
	sda = SdA(numpy_rng=numpy_rng, n_ins=32 * 32,
			  hidden_layers_sizes=[1300, 1300, 1300],
			  n_outs=10)

	## load the saved parameters
	if mode == 'test':
		learned_params = unpickle('params/SdA.pkl')


	print '... getting the pretraining functions'
	if mode == 'train':
		pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x,
													batch_size=batch_size)


	#########################
	# PRETRAINING THE MODEL #
	#########################

	if mode == 'train':
		print '... pre-training the model'
		start_time = time.clock()
		## Pre-train layer-wise
		corruption_levels = [.1, .2, .3]
		for i in xrange(sda.n_layers):
			# go through pretraining epochs
			for epoch in xrange(pretraining_epochs):
				# go through the training set
				c = []
				for batch_index in xrange(n_train_batches):
					c.append(pretraining_fns[i](index=batch_index,
							 corruption=corruption_levels[i],
							 lr=pretrain_lr))
				print 'Pre-training layer %i, epoch %d / %d, cost ' % (i, epoch + 1, pretraining_epochs),
				print numpy.mean(c)

		end_time = time.clock()

		print >> sys.stderr, ('The pretraining code for file ' +
							  os.path.split(__file__)[1] +
							  ' ran for %.2fm' % ((end_time - start_time) / 60.))

	########################
	# FINETUNING THE MODEL #
	########################

	# get the training, validation and testing function for the model
	print '... getting the finetuning functions'
	if mode == 'train':
		train_fn, validate_model = sda.build_finetune_functions(
					datasets=datasets, batch_size=batch_size,
					learning_rate=finetune_lr)

	print '... finetunning the model'
	# early-stopping parameters
	if mode == 'train':
		patience = 10 * n_train_batches  # look as this many examples regardless
		patience_increase = 2.  # wait this much longer when a new best is
								# found
		improvement_threshold = 0.995  # a relative improvement of this much is
									   # considered significant
		validation_frequency = min(n_train_batches, patience / 2)
									  # go through this many
									  # minibatche before checking the network
									  # on the validation set; in this case we
									  # check every epoch


	# create a function to get the labels predicted by the model
	if mode == 'test':
		get_test_labels = theano.function([index], sda.logLayer.y_pred,
				givens={
					x: test_set_x[index * batch_size: (index + 1) * batch_size],
					sda.sigmoid_layers[0].W: learned_params[0],
					sda.sigmoid_layers[0].b: learned_params[1],
					sda.sigmoid_layers[1].W: learned_params[2],
					sda.sigmoid_layers[1].b: learned_params[3],
					sda.sigmoid_layers[2].W: learned_params[4],
					sda.sigmoid_layers[2].b: learned_params[5],
					sda.logLayer.W: learned_params[6],
					sda.logLayer.b: learned_params[7]})


	best_params = None
	best_validation_loss = numpy.inf
	test_score = 0.
	start_time = time.clock()

	if mode == 'train':
		done_looping = False
	else:
		done_looping = True

	epoch = 0

	while (epoch < training_epochs) and (not done_looping):
		epoch = epoch + 1
		for minibatch_index in xrange(n_train_batches):
			minibatch_avg_cost = train_fn(minibatch_index)

			## save the parameters
			if mode == 'train':
				get_params = theano.function(inputs=[],
						outputs=[sda.sigmoid_layers[0].W, sda.sigmoid_layers[0].b,
							sda.sigmoid_layers[1].W, sda.sigmoid_layers[1].b,
							sda.sigmoid_layers[2].W, sda.sigmoid_layers[2].b,
							sda.logLayer.W, sda.logLayer.b])


				save_parameters(get_params(), 'SdA')

			iter = (epoch - 1) * n_train_batches + minibatch_index

			if (iter + 1) % validation_frequency == 0:
				validation_losses = validate_model()
				this_validation_loss = numpy.mean(validation_losses)
				print('epoch %i, minibatch %i/%i, validation error %f %%' %
					  (epoch, minibatch_index + 1, n_train_batches,
					   this_validation_loss * 100.))

				# if we got the best validation score until now
				if this_validation_loss < best_validation_loss:

					#improve patience if loss improvement is good enough
					if (this_validation_loss < best_validation_loss *
						improvement_threshold):
						patience = max(patience, iter * patience_increase)

					# save best validation score and iteration number
					best_validation_loss = this_validation_loss
					best_iter = iter


			if patience <= iter:
				done_looping = True
				break


	if mode == 'test':
		print 'predicting the labels...'
		pred_labels = [[0 for j in xrange(batch_size)] for i in xrange(n_test_batches)]
		for i in xrange(n_test_batches):
			print str(i+1), '/', str(n_test_batches)
			pred_labels[i] = get_test_labels(i)

		writer = csv.writer(file('result/SdA.csv', 'w'))
		row = 1

		print 'output test labels...'
		for i in xrange(len(pred_labels)):
			print str(i+1), '/', str(len(pred_labels))
			for j in xrange(len(pred_labels[i])):
				writer.writerow([row, pred_labels[i][j]])
				row += 1


	end_time = time.clock()
	print(('Optimization complete with best validation score of %f %%,'
		   'with test performance %f %%') %
				 (best_validation_loss * 100., test_score * 100.))
	print >> sys.stderr, ('The training code for file ' +
						  os.path.split(__file__)[1] +
						  ' ran for %.2fm' % ((end_time - start_time) / 60.))