def cf_nn(nn_params, input_layer_size, hidden_layer_size, num_labels, X, Y, lambda_coef): Theta1 = nn_params[0, :hidden_layer_size * (input_layer_size + 1)].reshape( (hidden_layer_size, (input_layer_size + 1))) Theta2 = nn_params[0, hidden_layer_size * (input_layer_size + 1):].reshape( (num_labels, (hidden_layer_size + 1))) m = Y.shape[1] Y = Y.A A_1 = X Z_2 = Theta1 * A_1.T A_2 = sigmoid(Z_2) A_2 = add_zero_feature(A_2, axis=0) Z_3 = Theta2 * A_2 A_3 = sigmoid(Z_3) H = A_3.A J = np.sum(-Y * np.log(H) - (1 - Y) * np.log(1 - H)) / m reg_J = 0.0 reg_J += np.sum(np.power(Theta1, 2)[:, 1:]) reg_J += np.sum(np.power(Theta2, 2)[:, 1:]) J += reg_J * (float(lambda_coef) / (2 * m)) return J
def gf_nn(nn_params, input_layer_size, hidden_layer_size, num_labels, X, Y, lambda_coef): Theta1 = nn_params[0, :hidden_layer_size * (input_layer_size + 1)].reshape( (hidden_layer_size, (input_layer_size + 1))) Theta2 = nn_params[0, hidden_layer_size * (input_layer_size + 1):].reshape( (num_labels, (hidden_layer_size + 1))) m = Y.shape[1] A_1 = X Z_2 = Theta1 * A_1.T A_2 = sigmoid(Z_2) A_2 = add_zero_feature(A_2, axis=0) Z_3 = Theta2 * A_2 A_3 = sigmoid(Z_3) DELTA_3 = A_3 - Y DELTA_2 = np.multiply((Theta2.T * DELTA_3)[1:, :], sigmoid_gradient(Z_2)) Theta1_grad = (DELTA_2 * A_1) / m Theta2_grad = (DELTA_3 * A_2.T) / m lambda_coef = float(lambda_coef) Theta1_grad[:, 1:] += (lambda_coef / m) * Theta1[:, 1:] Theta2_grad[:, 1:] += (lambda_coef / m) * Theta2[:, 1:] return np.concatenate((Theta1_grad.A1, Theta2_grad.A1))
def cf_nn(nn_params, input_layer_size, hidden_layer_size, num_labels, X, Y, lambda_coef): Theta1 = nn_params[0, :hidden_layer_size * (input_layer_size + 1)].reshape((hidden_layer_size, (input_layer_size + 1))) Theta2 = nn_params[0, hidden_layer_size * (input_layer_size + 1):].reshape((num_labels, (hidden_layer_size + 1))) m = Y.shape[1] Y = Y.A A_1 = X Z_2 = Theta1*A_1.T A_2 = sigmoid(Z_2) A_2 = add_zero_feature(A_2, axis=0) Z_3 = Theta2*A_2 A_3 = sigmoid(Z_3) H = A_3.A J = np.sum(-Y*np.log(H) - (1-Y)*np.log(1-H))/m reg_J = 0.0 reg_J += np.sum(np.power(Theta1, 2)[:, 1:]) reg_J += np.sum(np.power(Theta2, 2)[:, 1:]) J += reg_J*(float(lambda_coef)/(2*m)) return J
def gf_nn(nn_params, input_layer_size, hidden_layer_size, num_labels, X, Y, lambda_coef): Theta1 = nn_params[0, :hidden_layer_size * (input_layer_size + 1)].reshape((hidden_layer_size, (input_layer_size + 1))) Theta2 = nn_params[0, hidden_layer_size * (input_layer_size + 1):].reshape((num_labels, (hidden_layer_size + 1))) m = Y.shape[1] A_1 = X Z_2 = Theta1*A_1.T A_2 = sigmoid(Z_2) A_2 = add_zero_feature(A_2, axis=0) Z_3 = Theta2*A_2 A_3 = sigmoid(Z_3) DELTA_3 = A_3 - Y DELTA_2 = np.multiply((Theta2.T*DELTA_3)[1:, :], sigmoid_gradient(Z_2)) Theta1_grad = (DELTA_2 * A_1)/m Theta2_grad = (DELTA_3 * A_2.T)/m lambda_coef = float(lambda_coef) Theta1_grad[:, 1:] += (lambda_coef/m)*Theta1[:, 1:] Theta2_grad[:, 1:] += (lambda_coef/m)*Theta2[:, 1:] return np.concatenate((Theta1_grad.A1, Theta2_grad.A1))
import matplotlib.pyplot as plt import numpy as np from numpy.linalg import pinv from common_functions import load_data, J_liner_regression, add_zero_feature, gradient_descent, matrix_args, feature_normalize if __name__ == '__main__': X, y = load_data('ex1data2.txt') mu, sigma, X = feature_normalize(X) X = add_zero_feature(X) iterations = 400 alphas = [0.01, 0.1] f, axarr = plt.subplots(len(alphas), sharex=True) plt.xlabel('Number of Iterations') plt.ylabel('Cost J') for i, alpha in enumerate(alphas): theta = np.zeros((X.shape[1], 1)) theta, J_history = gradient_descent(J_liner_regression, X, y, iterations, theta, alpha) axarr[i].set_title('Alpha = {}'.format(alpha)) axarr[i].plot(range(len(J_history)), J_history) plt.show() # % Estimate the price of a 1650 sq-ft, 3 br house # % ====================== YOUR CODE HERE ====================== # % Recall that the first column of X is all-ones. Thus, it does # % not need to be normalized.
n_sampels = 100 sampels = np.random.choice(len(X), n_sampels) fig = plt.figure(figsize=(8, 8)) # figure size in inches fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05) for i, j in enumerate(sampels): ax = fig.add_subplot(10, 10, i + 1, xticks=[], yticks=[]) ax.imshow(X[j, :].reshape(20, 20).T, cmap=plt.cm.binary, interpolation='nearest') ax.text(0, 7, str(y[j, 0])) plt.show() num_labels = 10 X = add_zero_feature(X) m, n = X.shape initial_theta = np.ones((n, 1)) all_theta = np.vstack([minimize(cost_function, initial_theta, method='BFGS', jac=grad_function, options={'disp': True, 'maxiter':100}, args=(X, (y == i).astype(int))).x for i in range(num_labels)]) y_pred = np.argmax(np.dot(X, all_theta.T), axis=1) print 'Training Set Accuracy: {}'.format(np.mean(y_pred == y.ravel()) * 100) # Use regularization lambda_coef = 0.1 all_theta = np.vstack([minimize(cost_function_reg, initial_theta, method='BFGS', jac=grad_function_reg, options={'disp': True, 'maxiter':100}, args=(X, (y == i).astype(int), lambda_coef)).x for i in range(num_labels)])
Xval = data['Xval'] yval = data['yval'] Xtest = data['Xtest'] ytest = data['ytest'] m = len(X) def plot_data(): plt.xlabel('Change in water level (x)') plt.ylabel('Water flowing out of the dam (y)') plt.plot(X, y, 'rx') plot_data() plt.show() X_extended = add_zero_feature(X) theta = np.array([1, 1]) print 'J = {}, gradient = {}'.format(cost_function(theta, X_extended, y, 1), grad_function(theta, X_extended, y, 1)) theta = train_linear_regression(X_extended, y, 1) plot_data() plt.plot(X, np.dot(X_extended, theta[:, np.newaxis]).ravel()) plt.show() lambda_coef = 0 error_train, error_val = learning_curve(X_extended, y, add_zero_feature(Xval), yval, lambda_coef) plt.plot(range(1, m+1), error_train, label='Train') plt.plot(range(1, m+1), error_val, c='r', label='Cross validation')
def map_feature(X1, X2, degree=6): return add_zero_feature(np.hstack([X1**(i-j)*X2**j for i in range(1, degree+1) for j in range(i+1)]))
Theta2_grad[:, 1:] += (lambda_coef / m) * Theta2[:, 1:] return np.concatenate((Theta1_grad.A1, Theta2_grad.A1)) def rand_initialize_weights(L_in, L_out): epsilon_init = 0.12 return np.random.rand(L_out, 1 + L_in) * 2 * epsilon_init - epsilon_init if __name__ == '__main__': data = sio.loadmat('ex4data1.mat') y = data['y'] X = data['X'] X = add_zero_feature(X) data = sio.loadmat('ex4weights.mat') Theta1 = data['Theta1'] Theta2 = data['Theta2'] nn_params = np.concatenate((Theta1.ravel(), Theta2.ravel())) input_layer_size = 400 hidden_layer_size = 25 num_labels = 10 m = len(y) Y = (np.arange(num_labels)[:, np.newaxis] == (y.T - 1)).astype(float) for lambda_coef in (0, 1):
lambda_coef = float(lambda_coef) Theta1_grad[:, 1:] += (lambda_coef/m)*Theta1[:, 1:] Theta2_grad[:, 1:] += (lambda_coef/m)*Theta2[:, 1:] return np.concatenate((Theta1_grad.A1, Theta2_grad.A1)) def rand_initialize_weights(L_in, L_out): epsilon_init = 0.12 return np.random.rand(L_out, 1 + L_in) * 2 * epsilon_init - epsilon_init if __name__ == '__main__': data = sio.loadmat('ex4data1.mat') y = data['y'] X = data['X'] X = add_zero_feature(X) data = sio.loadmat('ex4weights.mat') Theta1 = data['Theta1'] Theta2 = data['Theta2'] nn_params = np.concatenate((Theta1.ravel(), Theta2.ravel())) input_layer_size = 400 hidden_layer_size = 25 num_labels = 10 m = len(y) Y = (np.arange(num_labels)[:, np.newaxis] == (y.T-1)).astype(float) for lambda_coef in (0, 1):
import scipy.io as sio import numpy as np from common_functions import add_zero_feature, sigmoid if __name__ == '__main__': data = sio.loadmat('ex3data1.mat') y = data['y'] X = data['X'] X = add_zero_feature(X) data = sio.loadmat('ex3weights.mat') Theta1 = data['Theta1'] Theta2 = data['Theta2'] p = sigmoid(np.dot(Theta1, X.T)) p = add_zero_feature(p, axis=0) p = sigmoid(np.dot(Theta2, p)) y_pred = np.argmax(p, axis=0)+1 print 'Training Set Accuracy: {}'.format(np.mean(y_pred == y.flatten()) * 100)
Xval = data["Xval"] yval = data["yval"] Xtest = data["Xtest"] ytest = data["ytest"] m = len(X) def plot_data(): plt.xlabel("Change in water level (x)") plt.ylabel("Water flowing out of the dam (y)") plt.plot(X, y, "rx") plot_data() plt.show() X_extended = add_zero_feature(X) theta = np.array([1, 1]) print "J = {}, gradient = {}".format(cost_function(theta, X_extended, y, 1), grad_function(theta, X_extended, y, 1)) theta = train_linear_regression(X_extended, y, 1) plot_data() plt.plot(X, np.dot(X_extended, theta[:, np.newaxis]).ravel()) plt.show() lambda_coef = 0 error_train, error_val = learning_curve(X_extended, y, add_zero_feature(Xval), yval, lambda_coef) plt.plot(range(1, m + 1), error_train, label="Train") plt.plot(range(1, m + 1), error_val, c="r", label="Cross validation")
import scipy.io as sio import numpy as np from common_functions import add_zero_feature, sigmoid if __name__ == '__main__': data = sio.loadmat('ex3data1.mat') y = data['y'] X = data['X'] X = add_zero_feature(X) data = sio.loadmat('ex3weights.mat') Theta1 = data['Theta1'] Theta2 = data['Theta2'] p = sigmoid(np.dot(Theta1, X.T)) p = add_zero_feature(p, axis=0) p = sigmoid(np.dot(Theta2, p)) y_pred = np.argmax(p, axis=0) + 1 print 'Training Set Accuracy: {}'.format( np.mean(y_pred == y.flatten()) * 100)