def logistic_graddesc(X, Y, T=1000, learning_rate=10e-2): #Gradient descent for logistic regression ( analytic method ) #It garantees convergence even when a subset of columns of X are multiples # https://deeplearningcourses.com/c/data-science-linear-regression-in-python # https://www.udemy.com/data-science-linear-regression-in-python w = np.random.randn(X.shape[1]) Yh = sigmoid(X.dot(w)) xe = [] for _ in xrange(T): delta = Y - Yh # gradient descent weight update w += learning_rate * X.T.dot(delta) # recalculate Y Yh = sigmoid(X.dot(w)) xe.append(xentropy(Y, Yh)) return w, xe
def logisticl1(X, Y, l1, T=1000, learning_rate=10e-2): #Gradient descent for logistic regression adjusted for l2 regularization making weights smaller #It garantees convergence even when a subset of columns of X are multiples #https://www.udemy.com/data-science-logistic-regression-in-python/learn/v4/t/lecture/6183984?start=0 # https://deeplearningcourses.com/c/data-science-linear-regression-in-python # https://www.udemy.com/data-science-logistic-regression-in-python/learn/v4/t/lecture/3963018?start=0 w = np.random.randn(X.shape[1]) xe = [] for _ in xrange(T): Yh = sigmoid(X.dot(w)) delta = Yh - Y # gradient descent weight update w -= learning_rate * (X.T.dot(delta) + l1 * np.sign(w)) xe.append(xentropy(Y, Yh) + l1 * np.abs(w).mean()) return w, xe
X[50:, :] = X[50:, :] + 2 * np.ones((50, d)) T = np.array([0] * 50 + [1] * 50) ones = np.ones((n, 1)) Xb = np.concatenate((ones, X), axis=1) #randomly initialize the weights w = np.random.randn(d + 1) #calculate the model output z = Xb.dot(w) Y = sigmoid(z) print xentropy(T, Y) w, xe = logistic_graddesc(Xb, T, 1000, 0.01) wl2, xel2 = logisticl2(Xb, Y, 10, T=1000, learning_rate=10e-2) print "Final w:", w, xe[-1] print "Final wl2:", wl2, xel2[-1] # plot the data and separating line x_axis = np.linspace(-6, 6, 100) y_axis = -(w[0] + x_axis * w[1]) / w[2] y_axisl2 = -(wl2[0] + x_axis * wl2[1]) / wl2[2] # plt.plot(x_axis, y_axis) # plt.show()
Xtest = X[-100:] Ytest = Y[-100:] d = X.shape[1] w = np.random.randn(d) b = 0 train_costs = [] test_costs = [] learning_rate = 0.001 for i in xrange(10000): pYtrain = fwd(Xtrain, w, b) pYtest = fwd(Xtest, w, b) ctrain = xentropy(Ytrain, pYtrain) ctest = xentropy(Ytest, pYtest) train_costs.append(ctrain) test_costs.append(ctest) w -= learning_rate * Xtrain.T.dot(pYtrain - Ytrain) b -= learning_rate * (pYtrain - Ytrain).sum() if i % 1000 == 0: print i, ctrain, ctest, xentropy(Ytrain, pYtrain), xentropy(Ytest, pYtest) print "Final train classification_rate", classification_rate( Ytrain, np.round(pYtrain)) print "Final train classification_rate", classification_rate( Ytest, np.round(pYtest))