def heavy_ball_descent(x_train, y_train, x_test, y_test, alpha, T, lamb): train_accuracies = [] test_accuracies = [] n = x_train.shape[0] p = x_train.shape[1] w = np.ones(p) w_prev = w L, mu = get_lipschitz_mu(x_train, y_train, lamda, w) alpha = 0.8 * 4.0 / (math.sqrt(L) + math.sqrt(mu))**2 beta = 0.8 * (math.sqrt(L) - math.sqrt(mu))**2 / (math.sqrt(L) + math.sqrt(mu))**2 for t in tqdm(range(0, T)): w_before = w i = random.randint(0, n - 1) gradient = gradient_comp(x_train, y_train, lamb, i, w) + 2 * lamb * w w = w - alpha * gradient + beta * (w - w_prev) train_acc, test_acc = compute_acc(x_train, y_train, x_test, y_test, w) train_accuracies.append(train_acc) test_accuracies.append(test_acc) if t >= 1: w_prev = w_before return w, train_accuracies, test_accuracies
def FISTA_descent(x_train, y_train, x_test, y_test, alpha, T, lamb): train_accuracies = [] test_accuracies = [] n = x_train.shape[0] p = x_train.shape[1] w = np.ones(p) v = w u = w eta = 0 for _ in tqdm(range(0, T)): i = random.randint(0, n - 1) gradient = gradient_comp(x_train, y_train, lamb, i, w) + 2.0 * lamb * w new_eta = (1. + math.sqrt(1. + 4. * eta**2)) / 2. w = v - alpha * gradient v = w + (eta - 1.) / float(new_eta) * (w - u) eta = new_eta u = w train_acc, test_acc = compute_acc(x_train, y_train, x_test, y_test, w) train_accuracies.append(train_acc) test_accuracies.append(test_acc) return w, train_accuracies, test_accuracies
def gradient_descent(x_train, y_train, x_test, y_test, alpha, T, lamb): train_accuracies = [] test_accuracies = [] n = x_train.shape[0] p = x_train.shape[1] w = np.ones(p) for _ in tqdm(range(0, T)): gradient = 0 for i in range(n): gradient += gradient_comp(x_train, y_train, lamb, i, w) gradient = (1.0 / n) * gradient + 2 * lamb * w w = w - alpha * gradient train_acc, test_acc = compute_acc(x_train, y_train, x_test, y_test, w) train_accuracies.append(train_acc) test_accuracies.append(test_acc) return w, train_accuracies, test_accuracies
def gradient_descent(x_train, y_train, x_test, y_test, alpha, T, lamb): train_accuracies = [] test_accuracies = [] n = x_train.shape[0] p = x_train.shape[1] w = np.ones(p) for t in tqdm(range(0, T)): gradient = 0 i = random.randint(0, n - 1) gradient = gradient_comp(x_train, y_train, lamb, i, w) w = w - alpha * (gradient + 2 * lamb * w) train_acc, test_acc = compute_acc(x_train, y_train, x_test, y_test, w) train_accuracies.append(train_acc) test_accuracies.append(test_acc) return w, train_accuracies, test_accuracies
def Nesterov_88(x_train, y_train, x_test, y_test, alpha, T, lamb): train_accuracies = [] test_accuracies = [] n = x_train.shape[0] p = x_train.shape[1] w = np.ones(p) v = w for t in tqdm(range(0, T)): w_prev = w i = random.randint(0, n - 1) gradient = gradient_comp(x_train, y_train, lamb, i, w) + 2. * lamb * w w = v - alpha * gradient v = w + (float(t) / (t + 3.)) * (w - w_prev) train_acc, test_acc = compute_acc(x_train, y_train, x_test, y_test, w) train_accuracies.append(train_acc) test_accuracies.append(test_acc) return w, train_accuracies, test_accuracies
def gauss_southwell_descent(x_train, y_train, x_test, y_test, alpha, T, lamb): train_accuracies = [] test_accuracies = [] n = x_train.shape[0] p = x_train.shape[1] w = np.ones(p) for t in tqdm(range(0, T)): gradient = 0 for i in range(n): gradient += gradient_comp(x_train, y_train, lamb, i, w) gradient = (1.0 / n) * gradient + 2 * lamb * w k = np.argmax(abs(gradient)) w[k] = w[k] - alpha * gradient[k] #* 1.04 train_acc, test_acc = compute_acc(x_train, y_train, x_test, y_test, w) train_accuracies.append(train_acc) test_accuracies.append(test_acc) return w, train_accuracies, test_accuracies
def cyclic_coord_gradient_descent(x_train, y_train, x_test, y_test, alpha, T, lamb): train_accuracies = [] test_accuracies = [] n = x_train.shape[0] p = x_train.shape[1] w = np.ones(p) for t in tqdm(range(0, T)): gradient = 0 for i in range(n): gradient += gradient_comp(x_train, y_train, lamb, i, w) gradient = (1.0 / n) * gradient + 2 * lamb * w for k in range(p): w[k] = w[k] - alpha * gradient[k] #* 1.04 train_acc, test_acc = compute_acc(x_train, y_train, x_test, y_test, w) train_accuracies.append(train_acc) test_accuracies.append(test_acc) return w, train_accuracies, test_accuracies