def compare_M(X, y, k=1): # if k is less than 1, throw an error assert k > 0 # Create Xremain, which we will begin to slice columns out of Xremain = X # Create list M, which we will store each model Mi M = [] # Create list orig_indices, so we can keep track of how each model was # built orig_indices = [] for i in range(k): # get the index of the best column, and get it's cost ind, cost = compare(Xremain, y) # extract this best column Xnext = Xremain[:, ind].reshape(-1, 1) # add the extracted column to the new best matrix try: Xbest = np.c_[Xbest, Xnext] except: Xbest = Xnext # Extend the best matrix, this is Mi Xe = amf.extended_matrix(Xbest) # Get betas using normal equation of Mi betas = lirf.normal_equation(Xe, y) # Get Cost of Mi normal_eq_cost = lirf.cost_function(Xe, betas, y) # Get the original index of the extracted column orig_index = np.where(Xnext == X)[1][0] # Add the index to the originals list orig_indices.append(orig_index) # Append the Mi model to the Models list. Attach some meta data for # later use M.append({"model": Xbest}) # Remove the extracted column from the Xremain matrix Xremain = np.c_[Xremain[:, 0:ind], Xremain[:, ind + 1:X.shape[1]]] return (orig_indices, M)
def predict_gradient(X, zx, betas): # Step 1 - Combine the predication to your other data X_plus_z = np.append(X, zx, 0) # Step 2 - Normalize the combined data X_plus_z_normalized = amf.feature_normalization(X_plus_z) # Step 3 - Extract the normalized prediction data z_normalized = np.array([X_plus_z_normalized[-1]]) # Step 4 - Predict the y zy = predict(amf.extended_matrix(z_normalized), betas[-1]) return zy
def cost(X, y, j): # 1 - Extract the J column Xreduced = X[:, j].reshape(-1, 1) #2 - Extend Xn Xe = amf.extended_matrix(Xreduced) # 3 - Get betas using normal equation betas = lirf.normal_equation(Xe, y) # 4 - Get Cost normal_eq_cost = lirf.cost_function(Xe, betas, y) return normal_eq_cost
def exerciseA_1_gradient(): print("\nExercise A.1 Gradient") # Load Data X, y = load_data() ### Gradient # Step 1 - Normalize and Extend X Xe_n = amf.extended_matrix(amf.feature_normalization(X)) # Step 2 - Calculate betas using gradient descent betas = lirf.gradient_descent(Xe_n, y, alpha=.001, n=1000) # Step 3 - Calculate cost function for each beta J_gradient = [] for i, j in enumerate(betas): J_grad = lirf.cost_function(Xe_n, betas[i], y) J_gradient.append(J_grad) # Step 4 - Plot the cost over iterations fig, ax1 = plt.subplots() fig.suptitle('Ex A.1 Gradient Descent, alpha = .001', fontsize=14) ax1.set(xlabel="Number of iterations = " + str(len(betas)), ylabel="Cost J, min = " + str(round(J_gradient[-1], 3))) ax1.plot(np.arange(0, len(betas)), J_gradient) plt.xlim(0, len(betas)) plt.show() # Step 5 - Predict arbitrary height # 5a) Place in matrix heights_to_predict = np.array([[65, 70]]) # 5b) Place in matrix y_parents_grad = lirf.predict_gradient(X, heights_to_predict, betas) print("==> The predicted height for a girl with parents (65.70) is:\n", round(y_parents_grad[0], 2))
def exercise1_1(): print("\nExercise 1 - Normal Equation") # Step 1 - Load Data Csv_data = np.loadtxt("./A2_datasets_2020/GPUBenchmark.csv", delimiter=',') # load csv X = Csv_data[:, :-1] y = Csv_data[:, 6] # Step 2 - Normalize Data Xn = amf.feature_normalization(X) # Step 3 - Plot data fig, ax = plt.subplots(2, 3) fig.suptitle('Ex 1.1, Multivariate Data Sets', fontsize=14) fig.tight_layout(pad=1.0, rect=[0, 0.03, 1, 0.95]) titles = [ "CudaCores", "BaseClock", "BoostClock", "MemorySpeed", "MemoryConfig", "MemoryBandwidth", "BenchmarkSpeed" ] # iterate over columns of Xn by using the Transpose of Xn i, j = 0, 0 for ind, xi in enumerate(Xn.T): ax[i][j].scatter(xi, y) ax[i][j].set_title(titles[ind]) #ax[i][j].set_xlim([xi.min()-1.5, xi.max()+1.5]) j += 1 if j == 3: i, j = 1, 0 plt.show() # Step 4 - Get extended matrix Xe = amf.extended_matrix(X) # Step 5 - Get betas using normal equation betas = lirf.normal_equation(Xe, y) # Step 6 - Create prediction matrix pred = np.array([[2432, 1607, 1683, 8, 8, 256]]) # Step 7 - Make prediction y_pred = lirf.predict(amf.extended_matrix(pred), betas)[0] print( "Predicted benchmark:", y_pred, " \tActual benchmark: 114", ) # Step 9 - What is the cost J(β) when using the β computed by # the normal equation above? normal_eq_cost = lirf.cost_function(Xe, betas, y) print("Cost:", normal_eq_cost) print("\nExercise 1 - Gradient Descent") # Gradient - Step 1 - Normalize and Extend X Xe_n = amf.extended_matrix(amf.feature_normalization(X)) # Step 2 - Calculate betas using gradient descent alpha, n = .01, 1000 betas = lirf.gradient_descent(Xe_n, y, alpha, n) # Step 3 - Calculate cost function for each beta J_gradient = [] for i, j in enumerate(betas): J_grad = lirf.cost_function(Xe_n, betas[i], y) J_gradient.append(J_grad) grad_cost = J_gradient[-1] print("alpha =", str(alpha), " n =", str(n)) print("Cost:", str(grad_cost)) print( "Gradient cost within", str(round(100 * abs(grad_cost - normal_eq_cost) / normal_eq_cost, 5)) + "% of normal cost -> This is less than 1%!") # Step XXX - Predict benchmark y_parents_grad = lirf.predict_gradient( X, np.array([[2432, 1607, 1683, 8, 8, 256]]), betas) print("Predicted benchmark:", y_parents_grad[0])
def predict(to_predict, X, b): all_data = np.append(X, to_predict, 0) all_data_normalized = amf.feature_normalization(all_data) to_pred_normalized = np.array([all_data_normalized[-1]]) return sigmoid(amf.extended_matrix(to_pred_normalized), b)[-1]
def exerciseA_1(): print("\nExercise A.1") # Load Data global X, y X, y = load_data() # A1.1 - Plot Data fig, (ax1, ax2) = plt.subplots(1, 2, sharey=True, sharex=True) fig.suptitle('Ex A.1, Girl Height in inches', fontsize=14) ax1.set(xlabel="Mom Height", ylabel="Girl Height") ax2.set(xlabel="Dad Height") ax1.scatter(X[:, 0], y, c='#e82d8f', marker='1') ax2.scatter(X[:, 1], y, c='#40925a', marker='2') plt.show() # A1.2 - Compute Extended Matrix Xe_parents = amf.extended_matrix(X) print("Extended Matrix of Parent's Heights\n", Xe_parents, "\n") # A1.3 - Compute Normal Equation and Make a Prediction Beta_normal_parents = lirf.normal_equation(Xe_parents, y) y_parents_normal_eq = lirf.predict( amf.extended_matrix(np.array([[65, 70]])), Beta_normal_parents) print("==> Prediction of girl height with parental heights of 65,70\n", y_parents_normal_eq[0], "\n") # A1.4 - Apply Feature Normalization, plot dataset, # heights should be centered around 0 with a standard deviation of 1. X_feature_normalized_heights = amf.feature_normalization(X) fig, (ax1, ax2) = plt.subplots(1, 2, sharey=True, sharex=True) fig.suptitle('Ex A.1, Girl Height in inches', fontsize=14) ax1.set(xlabel="Mom Height Normalized", ylabel="Girl Height") ax2.set(xlabel="Dad Height Normalized") ax1.scatter(X_feature_normalized_heights[:, 0], y, c='#e82d8f', marker='1') ax2.scatter(X_feature_normalized_heights[:, 1], y, c='#40925a', marker='2') plt.show() # A1.5 - Compute the extended matrix Xe and apply the Normal equation # on the normalized version of (65.70). The prediction should # still be 65.42 inches. Xe_feature_normalized_heights = amf.extended_matrix( X_feature_normalized_heights) Beta_normal_parents_normalized = lirf.normal_equation( Xe_feature_normalized_heights, y) heights_to_predict = np.array([[65, 70]]) Heights_plus_pred = np.append(X, heights_to_predict, 0) Normalized_heights_plus_pred = amf.feature_normalization(Heights_plus_pred) Normalized_heights_to_pred = np.array([Normalized_heights_plus_pred[-1]]) y_parents_pred = lirf.predict( amf.extended_matrix(Normalized_heights_to_pred), Beta_normal_parents_normalized) print( "==> Prediction of girl height with normalized parental heights of 65,70\n", y_parents_pred[0], "\n") # A1.6 - Implement the cost function J(β) = n1 (Xeβ − y)T (Xeβ − y) as a # function of parameters Xe,y,β. The cost for β from the Normal # equation should be 4.068. cost_function_normalized = lirf.cost_function( Xe_feature_normalized_heights, Beta_normal_parents_normalized, y) print("==> Cost Function (normalized)\n", cost_function_normalized, "\n") cost_function = lirf.cost_function(Xe_parents, Beta_normal_parents, y) print("==> Cost Function not-normalized\n", cost_function, "\n")
def exerciseB_1(): print("\nExercise B - Logistic Regression") # Ex B.1 # Normalize Data Xn = amf.feature_normalization(X) # Plot data fig, ax1 = plt.subplots(1, 1) fig.suptitle('Ex B.1, Logistic Regression (normalized data)', fontsize=14) fig.tight_layout(pad=1.0, rect=[0, 0.03, 1, 0.95]) ax1.scatter( Xn[y > 0, 0], # plot first col on x Xn[y > 0, 1], # plot second col on y c='1', # use the classes (0 or 1) as plot colors label="Admitted", s=30, edgecolors='r' # optional, add a border to the dots ) ax1.scatter( Xn[y == 0, 0], # plot first col on x Xn[y == 0, 1], # plot second col on y c='0', # use the classes (0 or 1) as plot colors label="Not Admitted", s=30, edgecolors='r' # optional, add a border to the dots ) ax1.legend(loc='upper right') plt.show() # Ex B.2 print("\nB.2, sigmoid", lorf.sigmoid_matrix(np.array([[0, 1], [2, 3]]))) # Ex B.3 print("\nB.3, Xe", amf.extended_matrix(X)) # Ex B.4 beta = np.zeros(Xe_n.shape[1]) lcf = lorf.cost_function(Xe_n, beta, y) print("\nB.4, logistic cost, beta=[0,0,0]::", lcf, "\n(solution [0,0,0] / .6931)") # Ex B.5 global lgd lgd = lorf.gradient_descent( amf.extended_matrix(amf.feature_normalization(X)), y, .005, 1) print("\nB.5, gradient_descent alpha=.005 n=1::beta=", lgd, "cost=", lorf.cost_function(Xe_n, lgd, y), "\n(solution B1=[.05,0.141,0.125] / J=.6217)") # Ex B.6 lgd = lorf.gradient_descent( amf.extended_matrix(amf.feature_normalization(X)), y, .005, 1000) print("\nB.6, gradient_descent alpha=.005 n=1000::beta=", lgd, "cost=", lorf.cost_function(Xe_n, lgd, y), "\n(solution Bn=[1.686,3.923,3.657] / J=.2035)") # Plot also the linear decision boundary # Plot also the logistic decision boundary Xn = amf.feature_normalization(X) X1 = Xn[:, 0] X2 = Xn[:, 1] #Xe_n2 = amf.mapFeature(Xn[:,0], Xn[:,1], 2) h = .01 # stepsize in the mesh x_min, x_max = X1.min() - 0.1, X1.max() + 0.1 y_min, y_max = X2.min() - 0.1, X2.max() + 0.1 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) # Mesh Grid x1, x2 = xx.ravel(), yy.ravel() # Turn to two Nx1 arrays XXe = amf.mapFeature(x1, x2, 2) # Extend matrix for degree 2 lgd2 = lorf.gradient_descent(amf.mapFeature(X1, X2, 2), y, .005, 1000) p = lorf.sigmoid(XXe, lgd2) # classify mesh ==> probabilities classes = p > 0.5 # round off probabilities clz_mesh = classes.reshape(xx.shape) # return to mesh format cmap_light = ListedColormap(["#FFAAAA", "#AAFFAA", "#AAAAFF"]) # mesh plot cmap_bold = ListedColormap(["#FF0000", "#00FF00", "#0000FF"]) # colors plt.figure(2) plt.pcolormesh(xx, yy, clz_mesh, cmap=cmap_light) plt.scatter(X1, X2, c=y, marker=".", cmap=cmap_bold) plt.show() # Ex B.7 predict = lorf.predict(np.array([[45, 85]]), X, lgd) # Compute training errors errors = lorf.get_errors(Xe_n, lgd, y) print("\nB.7, predict [45 85]::", predict, "training errors::", errors, "\n(solution predict=.77, errors=11")
@author: Derek Yadgaroff, [email protected] """ import numpy as np import matplotlib.pyplot as plt import assignment2_logistic_regression_functions as lorf import assignment2_matrix_functions as amf from matplotlib.colors import ListedColormap # Load Data Csv_data = np.loadtxt("./A2_datasets_2020/admission.csv", delimiter=',') # load csv X = Csv_data[:, 0:2] y = Csv_data[:, -1] Xe = amf.extended_matrix(X) Xe_n = amf.extended_matrix(amf.feature_normalization(X)) def exerciseB_1(): print("\nExercise B - Logistic Regression") # Ex B.1 # Normalize Data Xn = amf.feature_normalization(X) # Plot data fig, ax1 = plt.subplots(1, 1) fig.suptitle('Ex B.1, Logistic Regression (normalized data)', fontsize=14) fig.tight_layout(pad=1.0, rect=[0, 0.03, 1, 0.95])
def exercise3_1(): print("\nExercise 3") # Ex 3.1 Part A # import data data = np.loadtxt("./A2_datasets_2020/breast_cancer.csv", delimiter=',') # load csv X = data[:, 0:9] y = data[:, -1] # benign = 2, malignant = 4 # Ex 3.1 Part B # Split data - # @NOTE - Using this method was approved on Slack by TA test_size = 0.2 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size) # Ex 3.2 # Modify labels for i in range(len(y_train)): y_train[i] = 0 if y_train[i] == 2 else 1 for i in range(len(y_test)): y_test[i] = 0 if y_test[i] == 2 else 1 # Ex 3.3 X_train_extended_normalized = amf.extended_matrix( amf.feature_normalization(X_train)) alpha, n = .0001, 10000 betas = lorf.gradient_descent(X_train_extended_normalized, y_train, alpha, n, get_all_betas=True) costs = [] for i, beta in enumerate(betas): costs.append([ i, lorf.cost_function(X_train_extended_normalized, beta, y_train) ]) # Plot data fig, ax1 = plt.subplots(1, 1) fig.suptitle('Ex 3.3, Linear Logistic Regression Cost - α = ' + str(alpha) + ' n = ' + str(n), fontsize=14) fig.tight_layout(pad=1.0, rect=[0, 0.03, 1, 0.95]) c0 = np.array(costs)[:, 0] c1 = np.array(costs)[:, 1] ax1.plot(c0, c1) ax1.set_xlabel("N Iterations") ax1.set_ylabel("Cost") plt.show() #print("\nEx 3.3 - See plot") # Ex 3.4 # Compute training errors errors = lorf.get_errors(X_train_extended_normalized, beta, y_train) correct = len(X_train_extended_normalized) - errors accuracy = (correct - errors) / len(X_train_extended_normalized) print("\nEx 3.4") print("Train:: Errors =", errors, "Correct =", correct, "Accuracy =", accuracy) # Ex 3.5 # Compute training errors X_test_extended_normalized = amf.extended_matrix( amf.feature_normalization(X_test)) test_errors = lorf.get_errors(X_test_extended_normalized, beta, y_test) test_correct = len(X_test_extended_normalized) - test_errors test_accuracy = (test_correct - test_errors) / len(X_test_extended_normalized) print("\nEx 3.5") print("Test:: Errors =", test_errors, "Correct =", test_correct, "Accuracy =", test_accuracy)