def cluster_distance(cluster1: Cluster, cluster2: Cluster, distance_agg: Callable = min) -> float: """ compute all the pairwise distances between cluster1 and cluster2 and apply the aggregation function _distance_agg_ to the resulting list """ return distance_agg([distance(v1, v2) for v1 in get_values(cluster1) for v2 in get_values(cluster2)])
def knn_classify(k: int, labeled_points: List[LabeledPoint], new_point: Vector) -> str: # Order the labeled points from nearest to farthest. by_distance = sorted(labeled_points, key=lambda lp: distance(lp.point, new_point)) # Find the labels for the k closest k_nearest_labels = [lp.label for lp in by_distance[:k]] # and let them vote. return majority_vote(k_nearest_labels)
def find_eigenvector(m: Matrix, tolerance: float = 0.00001) -> Tuple[Vector, float]: guess = [random.random() for _ in m] while True: result = matrix_times_vector(m, guess) # transform guess norm = magnitude(result) # compute norm next_guess = [x / norm for x in result] # rescale if distance(guess, next_guess) < tolerance: # convergence so return (eigenvector, eigenvalue) return next_guess, norm guess = next_guess
def random_distances(dim: int, num_pairs: int) -> List[float]: return [distance(random_point(dim), random_point(dim)) for _ in range(num_pairs)]
def main(): xs = range(-10, 11) actuals = [derivative(x) for x in xs] estimates = [difference_quotient(square, x, h=0.001) for x in xs] # plot to show they're basically the same import matplotlib.pyplot as plt plt.title("Actual Derivatives vs. Estimates") plt.plot(xs, actuals, 'rx', label='Actual') # red x plt.plot(xs, estimates, 'b+', label='Estimate') # blue + plt.legend(loc=9) # plt.show() plt.close() def partial_difference_quotient(f: Callable[[Vector], float], v: Vector, i: int, h: float) -> float: """Returns the i-th partial difference quotient of f at v""" w = [v_j + (h if j == i else 0) # add h to just the ith element of v for j, v_j in enumerate(v)] return (f(w) - f(v)) / h # "Using the Gradient" example # pick a random starting point v = [random.uniform(-10, 10) for i in range(3)] for epoch in range(1000): grad = sum_of_squares_gradient(v) # compute the gradient at v v = gradient_step(v, grad, -0.01) # take a negative gradient step print(epoch, v) assert distance(v, [0, 0, 0]) < 0.001 # v should be close to 0 # First "Using Gradient Descent to Fit Models" example from scratch.linear_algebra import vector_mean # Start with random values for slope and intercept. theta = [random.uniform(-1, 1), random.uniform(-1, 1)] learning_rate = 0.001 for epoch in range(5000): # Compute the mean of the gradients grad = vector_mean([linear_gradient(x, y, theta) for x, y in inputs]) # Take a step in that direction theta = gradient_step(theta, grad, -learning_rate) print(epoch, theta) slope, intercept = theta assert 19.9 < slope < 20.1, "slope should be about 20" assert 4.9 < intercept < 5.1, "intercept should be about 5" # Minibatch gradient descent example theta = [random.uniform(-1, 1), random.uniform(-1, 1)] for epoch in range(1000): for batch in minibatches(inputs, batch_size=20): grad = vector_mean([linear_gradient(x, y, theta) for x, y in batch]) theta = gradient_step(theta, grad, -learning_rate) print(epoch, theta) slope, intercept = theta assert 19.9 < slope < 20.1, "slope should be about 20" assert 4.9 < intercept < 5.1, "intercept should be about 5" # Stochastic gradient descent example theta = [random.uniform(-1, 1), random.uniform(-1, 1)] for epoch in range(100): for x, y in inputs: grad = linear_gradient(x, y, theta) theta = gradient_step(theta, grad, -learning_rate) print(epoch, theta) slope, intercept = theta assert 19.9 < slope < 20.1, "slope should be about 20" assert 4.9 < intercept < 5.1, "intercept should be about 5"
def main(): xs = range(-10, 11) actuals = [derivative(x) for x in xs] estimates = [difference_quotient(square, x, h=0.001) for x in xs] # plot to show they're basically the same import matplotlib.pyplot as plt plt.title("Actual Derivatives vs. Estimates") plt.plot(xs, actuals, 'rx', label='Actual') # red x plt.plot(xs, estimates, 'b+', label='Estimate') # blue + plt.legend(loc=9) # plt.show() plt.close() def partial_difference_quotient(f: Callable[[Vector], float], v: Vector, i: int, h: float) -> float: """Returns the i-th partial difference quotient of f at v""" w = [ v_j + (h if j == i else 0) # add h to just the ith element of v for j, v_j in enumerate(v) ] return (f(w) - f(v)) / h # "Using the Gradient" example # pick a random starting point v = [random.uniform(-10, 10) for i in range(3)] for epoch in range(1000): grad = sum_of_squares_gradient(v) # compute the gradient at v v = gradient_step(v, grad, -0.01) # take a negative gradient step print(epoch, v) assert distance(v, [0, 0, 0]) < 0.001 # v should be close to 0 # First "Using Gradient Descent to Fit Models" example from scratch.linear_algebra import vector_mean # Start with random values for slope and intercept. theta = [random.uniform(-1, 1), random.uniform(-1, 1)] learning_rate = 0.001 for epoch in range(5000): # Compute the mean of the gradients grad = vector_mean([linear_gradient(x, y, theta) for x, y in inputs]) # Take a step in that direction theta = gradient_step(theta, grad, -learning_rate) print(epoch, theta) slope, intercept = theta assert 19.9 < slope < 20.1, "slope should be about 20" assert 4.9 < intercept < 5.1, "intercept should be about 5" # Minibatch gradient descent example theta = [random.uniform(-1, 1), random.uniform(-1, 1)] for epoch in range(1000): for batch in minibatches(inputs, batch_size=20): grad = vector_mean( [linear_gradient(x, y, theta) for x, y in batch]) theta = gradient_step(theta, grad, -learning_rate) print(epoch, theta) slope, intercept = theta assert 19.9 < slope < 20.1, "slope should be about 20" assert 4.9 < intercept < 5.1, "intercept should be about 5" # Stochastic gradient descent example theta = [random.uniform(-1, 1), random.uniform(-1, 1)] for epoch in range(100): for x, y in inputs: grad = linear_gradient(x, y, theta) theta = gradient_step(theta, grad, -learning_rate) print(epoch, theta) slope, intercept = theta assert 19.9 < slope < 20.1, "slope should be about 20" assert 4.9 < intercept < 5.1, "intercept should be about 5"
changes_by_month: List[DailyChange] = {month: [] for month in range(1, 13)} for change in all_changes: changes_by_month[change.date.month].append(change) avg_daily_change = { month: sum(change.pct_change for change in changes) / len(changes) for month, changes in changes_by_month.items() } # October is the best month assert avg_daily_change[10] == max(avg_daily_change.values()) from scratch.linear_algebra import distance a_to_b = distance([63, 150], [67, 160]) # 10.77 a_to_c = distance([63, 150], [70, 171]) # 22.14 b_to_c = distance([67, 160], [70, 171]) # 11.40 a_to_b = distance([160, 150], [170.2, 160]) # 14.28 a_to_c = distance([160, 150], [177.8, 171]) # 27.53 b_to_c = distance([170.2, 160], [177.8, 171]) # 13.37 from typing import Tuple from scratch.linear_algebra import vector_mean from scratch.statistics import standard_deviation def scale(data: List[Vector]) -> Tuple[Vector, Vector]: """returns the means and standard deviations for each position"""
def random_distances(dim: int, num_pairs: int) -> List[float]: return [ distance(random_point(dim), random_point(dim)) for _ in range(num_pairs) ]
changes_by_month: List[DailyChange] = {month: [] for month in range(1, 13)} for change in all_changes: changes_by_month[change.date.month].append(change) avg_daily_change = { month: sum(change.pct_change for change in changes) / len(changes) for month, changes in changes_by_month.items() } # October is the best month assert avg_daily_change[10] == max(avg_daily_change.values()) from scratch.linear_algebra import distance a_to_b = distance([63, 150], [67, 160]) # 10.77 a_to_c = distance([63, 150], [70, 171]) # 22.14 b_to_c = distance([67, 160], [70, 171]) # 11.40 a_to_b = distance([160, 150], [170.2, 160]) # 14.28 a_to_c = distance([160, 150], [177.8, 171]) # 27.53 b_to_c = distance([170.2, 160], [177.8, 171]) # 13.37 from typing import Tuple from scratch.linear_algebra import vector_mean from scratch.statistics import standard_deviation def scale(data: List[Vector]) -> Tuple[Vector, Vector]: """returns the means and standard deviations for each position""" dim = len(data[0])