def cluster_distance(cluster1: Cluster, cluster2: Cluster, distance_agg: Callable = min) -> float: """Compute all the pairwise distances between cluster1 and cluster2 and apply the aggregation function _distance_agg_ to the resulting list""" return distance_agg([distance(v1,v2)] for v1 in get_values(cluster1) for v2 in get_values(cluster2))
def knn_classify(k: int, labeled_points: List[Vector], new_point: Vector) -> str: # Order the labeled points from nearest to farthest by_distance = sorted(labeled_points, key=lambda lp: distance(lp.point, new_point)) # Find the labels for the k closest k_nearest_labels = [lp.label for lp in by_distance[:k]] # and let them vote return majority_vote(k_nearest_labels)
def find_eigenvector(A, tolerance=0.00001): guess = [random.random() for _ in A] while True: result = matrix_operate(A, guess) length = magnitude(result) next_guess = scalar_multiply(1 / length, result) if distance(guess, next_guess) < tolerance: return next_guess, length # eigenvector, eigenvalue guess = next_guess
def find_eigenvector(A, tolerance=0.00001): guess = [random.random() for _ in A] while True: result = matrix_operate(A, guess) length = magnitude(result) next_guess = scalar_multiply(1/length, result) if distance(guess, next_guess) < tolerance: return next_guess, length # eigenvector, eigenvalue guess = next_guess
def knn_classify(k, labeled_points, new_point): """each labeled point should be a pair (point, label)""" # order the labeled points from nearest to farthest by_distance = sorted(labeled_points, key=lambda (point, _): distance(point, new_point)) # find the labels for the k closest k_nearest_labels = [label for _, label in by_distance[:k]] # and let them vote return majority_vote(k_nearest_labels)
def find_eigenvector(m: Matrix, tolerance: float = 0.00001) -> Tuple[Vector, float]: guess = [random.random() for _ in m] while True: result = matrix_times_vector(m, guess) # transform the guess norm = magnitude(result) # compute the norm next_guess = [x/norm for x in result] # rescale if distance(guess, next_guess) < tolerance: # convergence so return (eigenvector, eigenvalue) return next_guess, norm guess = next_guess return 1 if (i,j) in friend_pairs or (j,i) in friend_pairs else 0
import random import sys sys.path.insert(0, "../linear_algebra") from vector_operations import distance def step(v, direction, step_size): """move step_size in the direction from v""" return [v_i + step_size * direction_i for v_i, direction_i in zip(v, direction)] def sum_of_squares_gradient(v): return [2 * v_i for v_i in v] # pick a random starting point v = [random.randint(-10, 10) for i in range(3)] tolerance = 0.0000001 while True: gradient = sum_of_squares_gradient(v) # compute the gradient at v next_v = step(v, gradient, -0.01) # take a negative gradient step if distance(next_v, v) < tolerance: # stop if we're converging break v = next_v print v
return [2 * v_i for v_i in v] # pick a random starting point v = [random.uniform(-10, 10) for i in range(3)] for epoch in range(1000): # compute gradient grad = sum_of_squares_gradient(v) # update vector in the gradient direction v = gradient_step( v, grad, step_size=-0.01) # take a step in the negative gradient direction print(epoch, v) print("Distance = ", distance(v, [0, 0, 0])) assert distance(v, [0, 0, 0]) < 0.001 # v should be close to zero """Using the gradient descent algorith to find the slope and intercept of a linear equation""" # Create a linear equation with know parameters (slope = 20, intercept = 5) input = [(x, 20 * x + 5) for x in range(-50, 50)] def linear_gradient(x: float, y: float, theta: Vector) -> Vector: slope, intercept = theta predicted = slope * x + intercept # prediction of a linear model error = (predicted - y) squared_error = error**2 # minimize squared error grad = [2 * error * x, 2 * error] # using its gradient return grad
def random_distance(dim: int, num_pairs: int) -> List[float]: return [ distance(random_point(dim), random_point(dim)) for _ in range(num_pairs) ]