示例#1
0
def check_stacked_autoencoder():
    """
    # Check the gradients for the stacked autoencoder
    #
    # In general, we recommend that the creation of such files for checking
    # gradients when you write new cost functions.
    #

    :return:
    """
    ## Setup random data / small model

    input_size = 64
    hidden_size_L1 = 36
    hidden_size_L2 = 25
    lambda_ = 0.01
    data = np.random.randn(input_size, 10)
    labels = np.random.randint(4, size=10)
    num_classes = 4

    stack = [dict() for i in range(2)]
    stack[0]['w'] = 0.1 * np.random.randn(hidden_size_L1, input_size)
    stack[0]['b'] = np.random.randn(hidden_size_L1)
    stack[1]['w'] = 0.1 * np.random.randn(hidden_size_L2, hidden_size_L1)
    stack[1]['b'] = np.random.randn(hidden_size_L2)
    softmax_theta = 0.005 * np.random.randn(hidden_size_L2 * num_classes)

    params, net_config = stacked_autoencoder.stack2params(stack)

    stacked_theta = np.concatenate((softmax_theta, params))

    cost, grad = stacked_autoencoder.stacked_autoencoder_cost(stacked_theta, input_size,
                                                              hidden_size_L2, num_classes,
                                                              net_config, lambda_, data, labels)

    # Check that the numerical and analytic gradients are the same
    J = lambda x: stacked_autoencoder.stacked_autoencoder_cost(x, input_size, hidden_size_L2,
                                                               num_classes, net_config, lambda_,
                                                               data, labels)
    num_grad = compute_gradient(J, stacked_theta)

    print num_grad, grad
    print "The above two columns you get should be very similar.\n" \
          "(Left-Your Numerical Gradient, Right-Analytical Gradient)\n"

    diff = np.linalg.norm(num_grad - grad) / np.linalg.norm(num_grad + grad)
    print diff
    print "Norm of the difference between numerical and analytical num_grad (should be < 1e-9)\n"
示例#2
0
# ======================================================================
# STEP 5: Finetune softmax model

# Implement the stacked_autoencoder_cost to give the combined cost of the whole model
# then run this cell.


# Initialize the stack using the parameters learned
stack = [dict() for i in range(2)]
stack[0]['w'] = sae1_opt_theta[0:hidden_size_L1 * input_size].reshape(hidden_size_L1, input_size)
stack[0]['b'] = sae1_opt_theta[2 * hidden_size_L1 * input_size:2 * hidden_size_L1 * input_size + hidden_size_L1]
stack[1]['w'] = sae2_opt_theta[0:hidden_size_L1 * hidden_size_L2].reshape(hidden_size_L2, hidden_size_L1)
stack[1]['b'] = sae2_opt_theta[2 * hidden_size_L1 * hidden_size_L2:2 * hidden_size_L1 * hidden_size_L2 + hidden_size_L2]

# Initialize the parameters for the deep model
(stack_params, net_config) = stacked_autoencoder.stack2params(stack)

stacked_autoencoder_theta = np.concatenate((softmax_theta.flatten(), stack_params))

J = lambda x: stacked_autoencoder.stacked_autoencoder_cost(x, input_size, hidden_size_L2,
                                                           num_classes, net_config, lambda_,
                                                           train_images, train_labels)

options_ = {'maxiter': 400, 'disp': True}
result = scipy.optimize.minimize(J, stacked_autoencoder_theta, method='L-BFGS-B', jac=True, options=options_)
stacked_autoencoder_opt_theta = result.x

print result

# ======================================================================
# STEP 6: Test
# Implement the stacked_ae_cost to give the combined cost of the whole model then run this cell.

# Initialize the stack using the parameters learned

n_stack = 2 # Two layers
stack = [{} for i in range(n_stack)]

stack[0]['w'] = sae1_opt_theta[0:hidden_size_L1*input_size].reshape((hidden_size_L1, input_size))
stack[0]['b'] = sae1_opt_theta[2*hidden_size_L1*input_size: 2*hidden_size_L1*input_size + hidden_size_L1]

stack[1]['w'] = sae2_opt_theta[0:hidden_size_L2*hidden_size_L1].reshape((hidden_size_L2, hidden_size_L1))
stack[1]['b'] = sae2_opt_theta[2*hidden_size_L2*hidden_size_L1: 2*hidden_size_L2*hidden_size_L1 + hidden_size_L2]

# Initialize the parameters for the deep model
stack_params, net_config = stack2params(stack)
stacked_ae_theta = np.concatenate((softmax_opt_theta, stack_params))

# Instructions: Train the deep network, hidden size here refers to the
#               dimension of the input to the classifier, which corresponds
#               to "hidden_size_L2".

J = lambda theta : stacked_ae_cost(theta, input_size, hidden_size_L2, n_classes, net_config, lambda_, train_data, train_labels)

#check_stacked_ae_cost() # Verify the correctness

# Find out the optimal theta
options = {'maxiter': maxiter, 'disp': True}
results = scipy.optimize.minimize(J, stacked_ae_theta, method='L-BFGS-B', jac=True, options=options)
stacked_ae_opt_theta = results['x']
	
	result = optimize.minimize(softmaxCostCallback, thetaParam, method='L-BFGS-B', jac=True, options=softmax_options)
	
	saeSoftmaxOptTheta = result.x[0:numClasses*hiddenSizeL2]
	
	save(saeSoftmaxOptThetaFilename, saeSoftmaxOptTheta)

# Finetune softmax model

stack = [stacked_autoencoder.Layer(1), stacked_autoencoder.Layer(2)]
stack[0].W = sae1OptTheta[0:hiddenSizeL1*inputSize].reshape(hiddenSizeL1, inputSize)
stack[0].b = sae1OptTheta[2*hiddenSizeL1*inputSize:2*hiddenSizeL1*inputSize+hiddenSizeL1]
stack[1].W = sae2OptTheta[0:hiddenSizeL2*hiddenSizeL1].reshape(hiddenSizeL2, hiddenSizeL1)
stack[1].b = sae2OptTheta[2*hiddenSizeL2*hiddenSizeL1:2*hiddenSizeL2*hiddenSizeL1+hiddenSizeL2]

(stackParams, netConfig) = stacked_autoencoder.stack2params(stack)
stackedAETheta = concatenate([saeSoftmaxOptTheta, stackParams])

saeOptThetaFilename = results_dir + 'saeOptTheta.npy'

if os.path.exists(saeOptThetaFilename):
	stackedAEOptTheta = load(saeOptThetaFilename)
else:
	def stackedAutoencoderCostCallback(x):
		return stacked_autoencoder.cost(x, inputSize, hiddenSizeL2, numClasses, netConfig,
				lambdaParam, trainData, trainLabels, corruptionLevel)

	result = optimize.minimize(stackedAutoencoderCostCallback, stackedAETheta, method='L-BFGS-B', jac=True, options=options)
	
	stackedAEOptTheta = result.x
	save(saeOptThetaFilename, stackedAEOptTheta)