def least_squares_fit(xs: List[Vector], ys: List[float], lr: float = 10**-3, num_steps: int = 1000, batch_size: int = 1) -> Vector: """ For the given inputs (`xs`) and outputs (`ys`), find the parameters (`beta`) that provide the best fit via multiple regression. Performs `num_steps` gradient descent steps with batch sizes of `batch_size` and a learning rate of `lr`. """ # paired = list(zip(xs, ys)) # random.shuffle(paired) doing this seems correct and exerimentally valid, but gets slightly worse results than the # book. Maybe the book has well-selected hyper parameters? # xs, ys = zip(*paired) beta = [random.random() for r in range(0, len(xs[0]))] for _ in tqdm.trange(num_steps, desc="least squares fit"): for start in range(0, len(xs), batch_size): # prepare a batch batch_xs = xs[start:start + batch_size] batch_ys = ys[start:start + batch_size] # compute an average gradient across the batch grads = [ sq_error_gradient(x, y, beta) for (x, y) in zip(batch_xs, batch_ys) ] avg_grad: Vector = vector_mean(grads) # update beta according to the gradient beta = gradient_step(beta, avg_grad, lr * -1) # by negative one b/c we are minimizing return beta
def first_principal_component(data: List[Vector], epochs: int = 1000, step_size: float = .1) -> Vector: """ Given a dataset, determine the first principle component using gradient descent epochs = # of epochs, step_size = learning rate """ # start with a random guess dir_w = [np.random.uniform(-1, 1) for _ in data[0]] with tqdm.trange(epochs) as t: for _ in t: dv = directional_variance(data, dir_w) gradient = dv_gradient(data, dir_w) dir_w = gradient_step(dir_w, gradient, step_size) t.set_description(f"dv: {dv:.3f}") # note the very nice formatting syntax for dv => 0.123 return direction(dir_w)
def first_principal_component(data: List[Vector], n: int = 100, step_size: float = 0.1) -> Vector: # Start with a random direction guess = [1.0 for _ in data[0]] with tqdm.trange(n) as t: for _ in t: dv = directional_variance(data, guess) gradient = directional_variance_gradient(data, guess) guess = gradient_step(guess, gradient, step_size) t.set_description(f"dv: {dv}") return direction(guess)
def least_squares_fit(xs: List[Vector], ys: Vector, alpha: float, learning_rate: float = 0.001, num_steps: int = 1000, batch_size: int = 1) -> Vector: """Finds beta that minimizes the sum of squared errors assuming the model dot(x, beta)""" # start with random guess guess = [random.random() for _ in xs[0]] for _ in tqdm.trange(num_steps, desc = "least squares fit"): for start in range(0,len(xs), batch_size): batch_xs = xs[start:start+batch_size] batch_ys = ys[start:start+batch_size] gradient = vector_mean([sqerror_ridge_gradient(x,y,guess,alpha) for x,y in zip(batch_xs, batch_ys)]) guess = gradient_step(guess, gradient, -learning_rate) return guess
def logistic_regression_fit(xs: List[Vector], ys: List[float], lr: float = 0.01, epochs: int = 5000, random_seed: int = None) -> Vector: """ return the parameters $\beta$ of a logistic regression model relating xs, ys fit with gradient descent for {@code epochs} epochs with a learning rate of {@code lr} """ if random_seed: random.seed(random_seed) # random initial guess beta: Vector = [random.random() for _ in range(len(xs[0]))] # out of laziness (of the developer), we'll just do gradient descent on the whole dataset instead of SGD with tqdm.trange(epochs) as t: for _ in t: gradient: Vector = negative_log_gradient(xs, ys, beta) beta = gradient_step(beta, gradient, -lr) loss = neg_log_likelihood(xs, ys, beta) t.set_description(f"loss {loss:.3f} beta: {beta})") return beta
[random.random() for _ in range(2 + 1)], # 1st hidden neuron [random.random() for _ in range(2 + 1)] ], # 2nd hidden neuron # output layer: 2 inputs -> 1 output [[random.random() for _ in range(2 + 1)]] # 1st output neuron ] learning_rate = 1.0 for epoch in tqdm.trange(20000, desc="neural net for xor"): for x, y in zip(xs, ys): gradients = sqerror_gradients(network, x, y) # Take a gradient step for each neuron in each layer network = [[ gradient_step(neuron, grad, -learning_rate) for neuron, grad in zip(layer, layer_grad) ] for layer, layer_grad in zip(network, gradients)] # check that it learned XOR assert feed_forward(network, [0, 0])[-1][0] < 0.01 assert feed_forward(network, [0, 1])[-1][0] > 0.99 assert feed_forward(network, [1, 0])[-1][0] > 0.99 assert feed_forward(network, [1, 1])[-1][0] < 0.01 pprint(network) # [ # hidden layer # [[7, 7, -3], # computes OR # [5, 5, -8]], # computes AND # # output layer # [[11, -12, -5]] # computes "first but not second"
from gradient_descent import gradient_step num_epochs = 10000 random.seed(0) guess = [random.random(), random.random()] learning_rate = 0.00001 with tqdm.trange(num_epochs) as t: for _ in t: alpha, beta = guess # initial guess # partial derivative of loss wrt alpha grad_a = sum(2 * error(alpha, beta, x_i, y_i) for x_i, y_i in zip(num_friends_good, daily_minutes_good)) # partial derivative of loss wrt beta grad_b = sum(2 * error(alpha, beta, x_i, y_i) * x_i for x_i, y_i in zip(num_friends_good, daily_minutes_good)) # computes loss to stick tqdm description loss = sum_of_sqerrors(alpha, beta, num_friends_good, daily_minutes_good) t.set_description(f"loss: {loss:3f}") # Finally update the guess guess = gradient_step(guess, [grad_a, grad_b], -learning_rate) guess
from machine_learning import train_test_split; import random import tqdm random.seed(0) x_train, x_test, y_train, y_test = train_test_split(rescaled_xs, ys, 0.33) learning_rate = 0.01 # Pick a random starting point beta = [random.random() for _ in range(3)] with tqdm.trange(5000) as t: for epoch in t: gradient = negative_log_gradient(x_train, y_train, beta) beta = gradient_step(beta, gradient, -learning_rate) loss = negative_log_likelihood(x_train, y_train, beta) t.set_description(f"loss:{loss:.3f} beta: {beta}") # Coefficients with rescaled data # Convert coefficients of rescaled data to the original data from working_with_data import scale means, stdevs = scale(xs) beta_unscaled = [(beta[0] - beta[1]*means[1] / stdevs[1] - beta[2]*means[2] / stdevs[2]), - beta[1]/ stdevs[1], - beta[2]/ stdevs[2]] beta_unscaled
def multiple_r_squared(xs: List[Vector], ys: Vector, beta: Vector) -> float: sum_of_squared_errors = sum(error(x,y,beta) ** 2 for x,y in zip(xs,ys)) return 1.0 - sum_of_squared_errors/total_sum_of_squares(ys) print(multiple_r_squared(inputs,daily_minutes_good,beta)) for _ in tqdm.trange(num_steps, desc = "least squares fit"): for start in range(0,len(xs), batch_size): batch_xs = xs[start:start+batch_size] batch_ys = ys[start:start+batch_size] gradient = vector_mean([sqerror_gradient(x,y,guess) for x,y in zip(batch_xs, batch_ys)]) guess = gradient_step(guess, gradient, -learning_rate) return guess num_friends = [100.0,49,41,40,25,21,21,19,19,18,18,16,15,15,15,15,14,14,13,13,13,13,12,12,11,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,8,8,8,8,8,8,8,8,8,8,8,8,8,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] daily_minutes = [1,68.77,51.25,52.08,38.36,44.54,57.13,51.4,41.42,31.22,34.76,54.01,38.79,47.59,49.1,27.66,41.03,36.73,48.65,28.12,46.62,35.57,32.98,35,26.07,23.77,39.73,40.57,31.65,31.21,36.32,20.45,21.93,26.02,27.34,23.49,46.94,30.5,33.8,24.23,21.4,27.94,32.24,40.57,25.07,19.42,22.39,18.42,46.96,23.72,26.41,26.97,36.76,40.32,35.02,29.47,30.2,31,38.11,38.18,36.31,21.03,30.86,36.07,28.66,29.08,37.28,15.28,24.17,22.31,30.17,25.53,19.85,35.37,44.6,17.23,13.47,26.33,35.02,32.09,24.81,19.33,28.77,24.26,31.98,25.73,24.86,16.28,34.51,15.23,39.72,40.8,26.06,35.76,34.76,16.13,44.04,18.03,19.65,32.62,35.59,39.43,14.18,35.24,40.13,41.82,35.45,36.07,43.67,24.61,20.9,21.9,18.79,27.61,27.21,26.61,29.77,20.59,27.53,13.82,33.2,25,33.1,36.65,18.63,14.87,22.2,36.81,25.53,24.62,26.25,18.21,28.08,19.42,29.79,32.8,35.99,28.32,27.79,35.88,29.06,36.28,14.1,36.63,37.49,26.9,18.58,38.48,24.48,18.95,33.55,14.24,29.04,32.51,25.63,22.22,19,32.73,15.16,13.9,27.2,32.01,29.27,33,13.74,20.42,27.32,18.23,35.35,28.48,9.08,24.62,20.12,35.26,19.92,31.02,16.49,12.16,30.7,31.22,34.65,13.13,27.51,33.2,31.57,14.1,33.42,17.44,10.12,24.42,9.82,23.39,30.93,15.03,21.67,31.09,33.29,22.61,26.89,23.48,8.38,27.81,32.35,23.84] daily_hours = [dm / 60 for dm in daily_minutes] outlier = num_friends.index(100) # index of outlier num_friends_good = [x for i, x in enumerate(num_friends) if i != outlier] daily_minutes_good = [x for i, x in enumerate(daily_minutes) if i != outlier]