示例#1
0
def heavy_ball_descent(x_train, y_train, x_test, y_test, alpha, T, lamb):
    train_accuracies = []
    test_accuracies = []

    n = x_train.shape[0]
    p = x_train.shape[1]
    w = np.ones(p)
    w_prev = w

    L, mu = get_lipschitz_mu(x_train, y_train, lamda, w)
    alpha = 0.8 * 4.0 / (math.sqrt(L) + math.sqrt(mu))**2
    beta = 0.8 * (math.sqrt(L) - math.sqrt(mu))**2 / (math.sqrt(L) +
                                                      math.sqrt(mu))**2

    for t in tqdm(range(0, T)):
        w_before = w
        i = random.randint(0, n - 1)
        gradient = gradient_comp(x_train, y_train, lamb, i, w) + 2 * lamb * w
        w = w - alpha * gradient + beta * (w - w_prev)

        train_acc, test_acc = compute_acc(x_train, y_train, x_test, y_test, w)
        train_accuracies.append(train_acc)
        test_accuracies.append(test_acc)

        if t >= 1:
            w_prev = w_before

    return w, train_accuracies, test_accuracies
示例#2
0
def FISTA_descent(x_train, y_train, x_test, y_test, alpha, T, lamb):
    train_accuracies = []
    test_accuracies = []

    n = x_train.shape[0]
    p = x_train.shape[1]
    w = np.ones(p)
    v = w
    u = w
    eta = 0

    for _ in tqdm(range(0, T)):
        i = random.randint(0, n - 1)
        gradient = gradient_comp(x_train, y_train, lamb, i, w) + 2.0 * lamb * w

        new_eta = (1. + math.sqrt(1. + 4. * eta**2)) / 2.
        w = v - alpha * gradient
        v = w + (eta - 1.) / float(new_eta) * (w - u)
        eta = new_eta
        u = w

        train_acc, test_acc = compute_acc(x_train, y_train, x_test, y_test, w)
        train_accuracies.append(train_acc)
        test_accuracies.append(test_acc)

    return w, train_accuracies, test_accuracies
示例#3
0
def gradient_descent(x_train, y_train, x_test, y_test, alpha, T, lamb):
    train_accuracies = []
    test_accuracies = []

    n = x_train.shape[0]
    p = x_train.shape[1]
    w = np.ones(p)

    for _ in tqdm(range(0, T)):
        gradient = 0
        for i in range(n):
            gradient += gradient_comp(x_train, y_train, lamb, i, w)
        gradient = (1.0 / n) * gradient + 2 * lamb * w
        w = w - alpha * gradient
        train_acc, test_acc = compute_acc(x_train, y_train, x_test, y_test, w)
        train_accuracies.append(train_acc)
        test_accuracies.append(test_acc)

    return w, train_accuracies, test_accuracies
示例#4
0
def gradient_descent(x_train, y_train, x_test, y_test, alpha, T, lamb):
    train_accuracies = []
    test_accuracies = []

    n = x_train.shape[0]
    p = x_train.shape[1]
    w = np.ones(p)

    for t in tqdm(range(0, T)):
        gradient = 0

        i = random.randint(0, n - 1)
        gradient = gradient_comp(x_train, y_train, lamb, i, w)
        w = w - alpha * (gradient + 2 * lamb * w)

        train_acc, test_acc = compute_acc(x_train, y_train, x_test, y_test, w)
        train_accuracies.append(train_acc)
        test_accuracies.append(test_acc)

    return w, train_accuracies, test_accuracies
示例#5
0
def Nesterov_88(x_train, y_train, x_test, y_test, alpha, T, lamb):
    train_accuracies = []
    test_accuracies = []

    n = x_train.shape[0]
    p = x_train.shape[1]
    w = np.ones(p)
    v = w

    for t in tqdm(range(0, T)):
        w_prev = w
        i = random.randint(0, n - 1)
        gradient = gradient_comp(x_train, y_train, lamb, i, w) + 2. * lamb * w
        w = v - alpha * gradient
        v = w + (float(t) / (t + 3.)) * (w - w_prev)

        train_acc, test_acc = compute_acc(x_train, y_train, x_test, y_test, w)
        train_accuracies.append(train_acc)
        test_accuracies.append(test_acc)

    return w, train_accuracies, test_accuracies
示例#6
0
def gauss_southwell_descent(x_train, y_train, x_test, y_test, alpha, T, lamb):
    train_accuracies = []
    test_accuracies = []

    n = x_train.shape[0]
    p = x_train.shape[1]
    w = np.ones(p)

    for t in tqdm(range(0, T)):
        gradient = 0
        for i in range(n):
            gradient += gradient_comp(x_train, y_train, lamb, i, w)
        gradient = (1.0 / n) * gradient + 2 * lamb * w

        k = np.argmax(abs(gradient))
        w[k] = w[k] - alpha * gradient[k]  #* 1.04

        train_acc, test_acc = compute_acc(x_train, y_train, x_test, y_test, w)
        train_accuracies.append(train_acc)
        test_accuracies.append(test_acc)

    return w, train_accuracies, test_accuracies
示例#7
0
def cyclic_coord_gradient_descent(x_train, y_train, x_test, y_test, alpha, T,
                                  lamb):
    train_accuracies = []
    test_accuracies = []

    n = x_train.shape[0]
    p = x_train.shape[1]
    w = np.ones(p)

    for t in tqdm(range(0, T)):
        gradient = 0
        for i in range(n):
            gradient += gradient_comp(x_train, y_train, lamb, i, w)
        gradient = (1.0 / n) * gradient + 2 * lamb * w
        for k in range(p):
            w[k] = w[k] - alpha * gradient[k]  #* 1.04

        train_acc, test_acc = compute_acc(x_train, y_train, x_test, y_test, w)
        train_accuracies.append(train_acc)
        test_accuracies.append(test_acc)

    return w, train_accuracies, test_accuracies