示例#1
0
def least_squares_fit(xs: List[Vector],
                      ys: List[float],
                      lr: float = 10**-3,
                      num_steps: int = 1000,
                      batch_size: int = 1) -> Vector:
    """
    For the given inputs (`xs`) and outputs (`ys`), find the parameters (`beta`) that provide the best fit 
    via multiple regression. Performs `num_steps` gradient descent steps with batch sizes of `batch_size` 
    and a learning rate of `lr`.
    """
    # paired = list(zip(xs, ys))
    # random.shuffle(paired)  doing this seems correct and exerimentally valid, but gets slightly worse results than the
    # book. Maybe the book has well-selected hyper parameters?
    # xs, ys = zip(*paired)
    beta = [random.random() for r in range(0, len(xs[0]))]
    for _ in tqdm.trange(num_steps, desc="least squares fit"):
        for start in range(0, len(xs), batch_size):
            # prepare a batch
            batch_xs = xs[start:start + batch_size]
            batch_ys = ys[start:start + batch_size]
            # compute an average gradient across the batch
            grads = [
                sq_error_gradient(x, y, beta)
                for (x, y) in zip(batch_xs, batch_ys)
            ]
            avg_grad: Vector = vector_mean(grads)
            # update beta according to the gradient
            beta = gradient_step(beta, avg_grad, lr *
                                 -1)  # by negative one b/c we are minimizing
    return beta
示例#2
0
def first_principal_component(data: List[Vector], epochs: int = 1000, step_size: float = .1) -> Vector:
    """
    Given a dataset, determine the first principle component using gradient descent
    epochs = # of epochs, step_size = learning rate
    """
    # start with a random guess
    dir_w = [np.random.uniform(-1, 1) for _ in data[0]]
    with tqdm.trange(epochs) as t:
        for _ in t:
            dv = directional_variance(data, dir_w)
            gradient = dv_gradient(data, dir_w)
            dir_w = gradient_step(dir_w, gradient, step_size)
            t.set_description(f"dv: {dv:.3f}")  # note the very nice formatting syntax for dv => 0.123
    return direction(dir_w)
示例#3
0
def first_principal_component(data: List[Vector],
                              n: int = 100,
                              step_size: float = 0.1) -> Vector:
    # Start with a random direction
    guess = [1.0 for _ in data[0]]

    with tqdm.trange(n) as t:
        for _ in t:
            dv = directional_variance(data, guess)
            gradient = directional_variance_gradient(data, guess)
            guess = gradient_step(guess, gradient, step_size)
            t.set_description(f"dv: {dv}")

    return direction(guess)
def least_squares_fit(xs: List[Vector],
                     ys: Vector,
                     alpha: float,
                     learning_rate: float = 0.001,
                     num_steps: int = 1000,
                     batch_size: int = 1) -> Vector:
    """Finds beta that minimizes the sum of squared errors
    assuming the model dot(x, beta)"""
    # start with random guess
    guess = [random.random() for _ in xs[0]]
    
    for _ in tqdm.trange(num_steps, desc = "least squares fit"):
        for start in range(0,len(xs), batch_size):
            batch_xs = xs[start:start+batch_size]
            batch_ys = ys[start:start+batch_size]
            
            gradient = vector_mean([sqerror_ridge_gradient(x,y,guess,alpha)
                                  for x,y in zip(batch_xs, batch_ys)])
            
            guess = gradient_step(guess, gradient, -learning_rate)
    return guess
示例#5
0
def logistic_regression_fit(xs: List[Vector],
                            ys: List[float],
                            lr: float = 0.01,
                            epochs: int = 5000,
                            random_seed: int = None) -> Vector:
    """
    return the parameters $\beta$ of a logistic regression model relating xs, ys fit with gradient descent for
    {@code epochs} epochs with a learning rate of {@code lr}
    """
    if random_seed:
        random.seed(random_seed)
    # random initial guess
    beta: Vector = [random.random() for _ in range(len(xs[0]))]
    # out of laziness (of the developer), we'll just do gradient descent on the whole dataset instead of SGD
    with tqdm.trange(epochs) as t:
        for _ in t:
            gradient: Vector = negative_log_gradient(xs, ys, beta)
            beta = gradient_step(beta, gradient, -lr)
            loss = neg_log_likelihood(xs, ys, beta)
            t.set_description(f"loss {loss:.3f} beta: {beta})")
    return beta
示例#6
0
            [random.random() for _ in range(2 + 1)],  # 1st hidden neuron
            [random.random() for _ in range(2 + 1)]
        ],  # 2nd hidden neuron
        # output layer: 2 inputs -> 1 output
        [[random.random() for _ in range(2 + 1)]]  # 1st output neuron
    ]

    learning_rate = 1.0

    for epoch in tqdm.trange(20000, desc="neural net for xor"):
        for x, y in zip(xs, ys):
            gradients = sqerror_gradients(network, x, y)

            # Take a gradient step for each neuron in each layer
            network = [[
                gradient_step(neuron, grad, -learning_rate)
                for neuron, grad in zip(layer, layer_grad)
            ] for layer, layer_grad in zip(network, gradients)]

    # check that it learned XOR
    assert feed_forward(network, [0, 0])[-1][0] < 0.01
    assert feed_forward(network, [0, 1])[-1][0] > 0.99
    assert feed_forward(network, [1, 0])[-1][0] > 0.99
    assert feed_forward(network, [1, 1])[-1][0] < 0.01

    pprint(network)
    # [  # hidden layer
    #     [[7, 7, -3],  # computes OR
    #      [5, 5, -8]],  # computes AND
    #     # output layer
    #     [[11, -12, -5]]  # computes "first but not second"
from gradient_descent import gradient_step

num_epochs = 10000
random.seed(0)

guess = [random.random(), random.random()]

learning_rate = 0.00001

with tqdm.trange(num_epochs) as t:
    for _ in t:
        alpha, beta = guess  # initial guess

        # partial derivative of loss wrt alpha
        grad_a = sum(2 * error(alpha, beta, x_i, y_i)
                     for x_i, y_i in zip(num_friends_good, daily_minutes_good))

        # partial derivative of loss wrt beta
        grad_b = sum(2 * error(alpha, beta, x_i, y_i) * x_i
                     for x_i, y_i in zip(num_friends_good, daily_minutes_good))

        # computes loss to stick tqdm description
        loss = sum_of_sqerrors(alpha, beta, num_friends_good,
                               daily_minutes_good)
        t.set_description(f"loss: {loss:3f}")

        # Finally update the guess
        guess = gradient_step(guess, [grad_a, grad_b], -learning_rate)

guess
from machine_learning import train_test_split;
import random
import tqdm

random.seed(0)
x_train, x_test, y_train, y_test = train_test_split(rescaled_xs, ys, 0.33)

learning_rate = 0.01

# Pick a random starting point
beta = [random.random() for _ in range(3)]

with tqdm.trange(5000) as t:
    for epoch in t:
        gradient = negative_log_gradient(x_train, y_train, beta)
        beta = gradient_step(beta, gradient, -learning_rate)
        loss = negative_log_likelihood(x_train, y_train, beta)
        t.set_description(f"loss:{loss:.3f} beta: {beta}")

# Coefficients with rescaled data
# Convert coefficients of rescaled data to the original data
from working_with_data import scale

means, stdevs = scale(xs)
beta_unscaled = [(beta[0]
                 - beta[1]*means[1] / stdevs[1]
                 - beta[2]*means[2] / stdevs[2]),
                 - beta[1]/ stdevs[1],
                 - beta[2]/ stdevs[2]]                
beta_unscaled
def multiple_r_squared(xs: List[Vector], ys: Vector, beta: Vector) -> float:
    sum_of_squared_errors = sum(error(x,y,beta) ** 2
                               for x,y in zip(xs,ys))
    return 1.0 - sum_of_squared_errors/total_sum_of_squares(ys)

print(multiple_r_squared(inputs,daily_minutes_good,beta))
    
    for _ in tqdm.trange(num_steps, desc = "least squares fit"):
        for start in range(0,len(xs), batch_size):
            batch_xs = xs[start:start+batch_size]
            batch_ys = ys[start:start+batch_size]
            
            gradient = vector_mean([sqerror_gradient(x,y,guess)
                                  for x,y in zip(batch_xs, batch_ys)])
            
            guess = gradient_step(guess, gradient, -learning_rate)
    return guess

num_friends = [100.0,49,41,40,25,21,21,19,19,18,18,16,15,15,15,15,14,14,13,13,13,13,12,12,11,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,8,8,8,8,8,8,8,8,8,8,8,8,8,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
daily_minutes = [1,68.77,51.25,52.08,38.36,44.54,57.13,51.4,41.42,31.22,34.76,54.01,38.79,47.59,49.1,27.66,41.03,36.73,48.65,28.12,46.62,35.57,32.98,35,26.07,23.77,39.73,40.57,31.65,31.21,36.32,20.45,21.93,26.02,27.34,23.49,46.94,30.5,33.8,24.23,21.4,27.94,32.24,40.57,25.07,19.42,22.39,18.42,46.96,23.72,26.41,26.97,36.76,40.32,35.02,29.47,30.2,31,38.11,38.18,36.31,21.03,30.86,36.07,28.66,29.08,37.28,15.28,24.17,22.31,30.17,25.53,19.85,35.37,44.6,17.23,13.47,26.33,35.02,32.09,24.81,19.33,28.77,24.26,31.98,25.73,24.86,16.28,34.51,15.23,39.72,40.8,26.06,35.76,34.76,16.13,44.04,18.03,19.65,32.62,35.59,39.43,14.18,35.24,40.13,41.82,35.45,36.07,43.67,24.61,20.9,21.9,18.79,27.61,27.21,26.61,29.77,20.59,27.53,13.82,33.2,25,33.1,36.65,18.63,14.87,22.2,36.81,25.53,24.62,26.25,18.21,28.08,19.42,29.79,32.8,35.99,28.32,27.79,35.88,29.06,36.28,14.1,36.63,37.49,26.9,18.58,38.48,24.48,18.95,33.55,14.24,29.04,32.51,25.63,22.22,19,32.73,15.16,13.9,27.2,32.01,29.27,33,13.74,20.42,27.32,18.23,35.35,28.48,9.08,24.62,20.12,35.26,19.92,31.02,16.49,12.16,30.7,31.22,34.65,13.13,27.51,33.2,31.57,14.1,33.42,17.44,10.12,24.42,9.82,23.39,30.93,15.03,21.67,31.09,33.29,22.61,26.89,23.48,8.38,27.81,32.35,23.84]

daily_hours = [dm / 60 for dm in daily_minutes]

outlier = num_friends.index(100)    # index of outlier
num_friends_good = [x
                    for i, x in enumerate(num_friends)
                    if i != outlier]

daily_minutes_good = [x
                      for i, x in enumerate(daily_minutes)
                      if i != outlier]