from scipy.optimize import minimize
from initial_params import initial_params
from sparse_autoencoder_cost import sparse_autoencoder_cost, sigmoid, der_sigmoid
from load_mnist import generate_patch, load_data
from display_network import display_network


visible_size = 28 * 28
hidden_size = 196
sparsity_param = 0.1
lamda = 0.003
beta = 3

images = np.transpose(load_data())[:, 0:10000]

patches = generate_patch()
theta = initial_params(visible_size, hidden_size)

J = lambda th: sparse_autoencoder_cost(
    visible_size, hidden_size, th, lambda x: sigmoid(x), lambda x: der_sigmoid(x), lamda, beta, sparsity_param, images
)

options_ = {"maxiter": 800, "disp": True}
result = minimize(J, theta, method="L-BFGS-B", jac=True, options=options_)
opt_theta = result.x

print result

W1 = opt_theta[0 : hidden_size * visible_size].reshape(hidden_size, visible_size).transpose()
display_network(W1)
from sparse_autoencoder_cost import sparse_autoencoder_cost, sigmoid, der_sigmoid
from load_mnist import generate_patch, load_data

visible_size = 28 * 28
hidden_size = 196
sparsity_param = 0.1
lamda = 0.003
beta = 3

images = np.transpose(load_data())[:, 0:10000]

theta = initial_params(visible_size, hidden_size)

x = generate_patch()
(cost, der) = sparse_autoencoder_cost(visible_size, hidden_size, theta,
                        lambda x : sigmoid(x), lambda x : der_sigmoid(x), lamda, beta, sparsity_param, images)
           
print 'real der value : ' , der[0]






# the value of analytic derivation of parameters
epsilon = 0.00001
tmp= np.zeros((theta.shape[0], 1))
tmp[0][0] = 1
cost1 = sparse_autoencoder_cost(visible_size, hidden_size, theta + tmp*epsilon,
                                lambda x : sigmoid(x), lambda x : der_sigmoid(x), lamda, beta, sparsity_param, images)
cost2 = sparse_autoencoder_cost(visible_size, hidden_size, theta - tmp*epsilon,