def course_logistic(xtrain, ytrain, xtest, ytest): w = logistic_regression(xtrain, ytrain, 1000, 0.01) print("w shape", w.shape) print("W avg size:", w.mean()) y_tag_test = sigmoid(xtest.dot(w)) y_tag_train = sigmoid(xtrain.dot(w)) print("cross entropy train:", cross_entropy(ytrain, y_tag_train)) print("cross entropy test:", cross_entropy(ytest, y_tag_test)) print("classification rate train:", classification_rate(y_tag_train, ytrain)) score = classification_rate(y_tag_test, ytest) print("classification rate test:", score) return score
bias = res[:, -1] w2 = np.array([1, 2, 3]) b = 1 res_2 = X.dot(w2) bias2 = b * res_2 # get the data Xtrain, Ytrain, Xtest, Ytest = get_e_commerce_binary_data() Ytest = np.round(np.random.random(Ytest.shape[0])) # Ytest = 1-Ytest # make predictions def sigmoid(a): return 1 / (1 + np.exp(-a)) w, train_costs, test_costs = logistic_regression_with_test( Xtrain, Ytrain, Xtest, Ytest, 10000, 0.0001) pYtrain = sigmoid(Xtrain.dot(w)) pYtest = sigmoid(Xtest.dot(w)) print("Final train classification_rate:", classification_rate(pYtrain, Ytrain)) print("Final test classification_rate:", classification_rate(pYtest, Ytest)) legend1, = plt.plot(train_costs, label='train cost') legend2, = plt.plot(test_costs, label='test cost') plt.legend([legend1, legend2]) plt.show()
# L1 regularization is also called Lasso regression, and L2 is also called ridge regression # ========================================================================================== N = 100 D = 2 Xb, T = create_data_2_gaussian_clouds(N, D) # randomly initialize the weights w = np.random.randn(D + 1) w_l2 = w w_normal = w # calculate the model output z = Xb.dot(w) Y = sigmoid(z) Y_l2 = Y Y_normal = Y # let's do gradient descent 100 times learning_rate = 0.1 smoothing_parameter = 0.1 for i in range(100): if i % 10 == 0: print("entopy for i normal:", i, " : ", cross_entropy(T, Y_normal)) print("entopy for i l2:", i, " : ", cross_entropy(T, Y_l2)) w_l2 = w_l2 + learning_rate * (Xb.T.dot(T - Y_l2) - smoothing_parameter * w_l2) Y_l2 = sigmoid(Xb.dot(w_l2))
import matplotlib.pyplot as plt import numpy as np from lazy_prog.common.common import sigmoid, cross_entropy, create_data_2_gaussian_clouds N = 100 D = 2 Xb, T = create_data_2_gaussian_clouds(N, D) w = np.random.randn(D + 1) z = Xb.dot(w) Y = sigmoid(z) learning_rate = 0.1 plt.scatter(Xb[:, 1], Xb[:, 2], c=T, s=100, alpha=0.5) for i in range(100000): if i % 10000 == 0: print("entropy for step ", i, " : ", cross_entropy(T, Y)) print("w at step", i, ":", w) if i == 0 or i == 1 or i == 10: x_axis = np.linspace(-6, 6, 100) y_axis = -(w[0] + x_axis * w[1]) / w[2] plt.plot(x_axis, y_axis, label="iter = " + str(i)) # gradient descent weight update. w += learning_rate * Xb.T.dot(T - Y) # recalculate Y Y = sigmoid(Xb.dot(w))
from lazy_prog.common.common import sigmoid # ElasticNet is the name of adding both L1 and L2 normalization N = 50 D = 50 # uniformly distributed numbers between -5, +5 X = (np.random.random((N, D)) - 0.5) * 10 # true weights - only the first 3 dimensions of X affect Y. We wish the model to learn only the first 3 mean anythin true_w = np.array([1, 0.5, -0.5] + [0] * (D - 3)) # generate Y - add noise with variance 0.5 Y = np.round(sigmoid(X.dot(true_w) + np.random.randn(N) * 0.5)) # perform gradient descent to find w costs = [] # keep track of squared error cost w = np.random.randn(D) / np.sqrt(D) # randomly initialize w learning_rate = 0.001 # l1 = 3.0 # try different values - what effect does it have on w? l1 = 10.0 # try different values - what effect does it have on w? l2 = 0.01 for t in range(5000): # update w Yhat = sigmoid(X.dot(w)) w = w - learning_rate * (X.T.dot(Yhat - Y) + l1 * np.sign(w)) # find and store the cost cost = -(Y * np.log(Yhat) + (1 - Y) * np.log(1 - Yhat)).mean() + l1 * np.abs(w).mean() + l2*w