def cluster_distance(cluster1, cluster2, distance_agg=min): """finds the aggregate distance between elements of cluster1 and elements of cluster2""" return distance_agg([ distance(input1, input2) for input1 in get_values(cluster1) for input2 in get_values(cluster2) ])
def print_distances(data_matrix): num_rows, num_cols = shape(data_matrix) print("Distances:") for i in range(num_rows): for i_next in range(num_rows): if i_next > i: d = distance(data_matrix[i], data_matrix[i_next]) print(i, "to", i_next, d)
def knn_classify2(k, cities, new_point): by_distance = sorted(cities, key=lambda city: distance((city.longitude, city.latitude), new_point)) k_nearest_labels = [label for _, _, label in by_distance[:k]] return majority_vote(k_nearest_labels)
def knn_classify(k, labeled_points, new_point): """each labeled point should be a pair (point, label)""" by_distance = sorted( labeled_points, key=lambda point_label: distance(point_label[0], new_point)) k_nearest_labels = [label for _, label in by_distance[:k]] return majority_vote(k_nearest_labels)
def knn_classify(k, labeled_points, new_point): """each labeled point should be a pair (point, label)""" # order the labeled points from nearest to farthest by_distance = sorted( labeled_points, key=lambda point_label: distance(point_label[0], new_point)) # find the labels for the k closest k_nearest_labels = [label for _, label in by_distance[:k]] # and let them vote return majority_vote(k_nearest_labels)
def knn_classify(k: int, lableled_ponts: List[LabeledPoint], new_point: Vector) -> str: # Order the labeled points from nearest to farthest by_distance = sorted(lableled_ponts, key=lambda lp: distance(lp.point, new_point)) # Find the labels for the k closest k_nearest_labels = [lp.label for lp in by_distance[:k]] return marjority_vote(k_nearest_labels)
def run_experiment(): v = [random.randint(-10, 10) for i in range(3)] tolerance = 0.0000001 while True: gradient = sum_of_squares_gradient(v) next_v = step(v, gradient, -0.01) if distance(next_v, v) < tolerance: break v = next_v print(v)
def knn_classify(k, labeled_points, new_point): """each labeled point should be a pair (point, label)""" # order the labeled points from nearest to farthest by_distance = sorted(labeled_points, key=lambda (point, _): distance(point, new_point)) # find the labels for the k closest k_nearest_labels = [label for _, label in by_distance[:k]] # and let them vote return majority_vote(k_nearest_labels)
def find_eigenvector(A, tolerance=0.00001): guess = [1 for __ in A] while True: result = matrix_operate(A, guess) length = magnitude(result) next_guess = scalar_multiply(1/length, result) if distance(guess, next_guess) < tolerance: return next_guess, length # eigenvector, eigenvalue guess = next_guess
def find_eigenvector(A, tolerance=0.00001): guess = [1 for __ in A] while True: result = matrix_operate(A, guess) length = magnitude(result) next_guess = scalar_multiply(1 / length, result) if distance(guess, next_guess) < tolerance: return next_guess, length # eigenvector, eigenvalue guess = next_guess
def knn_classify(k, labeled_points, new_point): #ラベル付きデータポイントは、(point, label)のペアとなっている #ラベル付きデータポイントを近いものから順に並べる by_distance = sorted( labeled_points, #key=lambda (point, _): distance(point, new_point)) key=lambda point_label: distance(point_label[0], new_point)) #近い順にk個取り出す k_nearest_labels = [label for _, label in by_distance[:k]] #多数決を行う return majority_vote(k_nearest_labels)
def knn_classify(k, labeled_points, new_point): """each labeled point should be a pair (point, label)""" # order the labeled points from nearest to farthest by_distance = sorted( labeled_points, key=lambda point_label: distance(point_label[ 0], new_point)) #새로운 포인트와 원래 존재하는 포인트들의 거리를 구해서 정렬한후 리스트로 반환 # find the labels for the k closest k_nearest_labels = [label for _, label in by_distance[:k] ] # 가장 가까운 거리에 있는 포인트들을 리스트형식으로 담아준다. # and let them vote return majority_vote(k_nearest_labels) # 거리가 동일한 포인트들이 있을 수도 있으니까, 투표를 해야함
def knn_classify(k, labeled_points, new_point): """each labeled point should be a pair (point, label)""" #每个标记点是一对 # order the labeled points from nearest to farthest #从最近到最远点标记点 by_distance = sorted(labeled_points, key=lambda (point, _): distance(point, new_point)) # find the labels for the k closest #找到最接近k的标签 k_nearest_labels = [label for _, label in by_distance[:k]] # and let them vote #进行投票 return majority_vote(k_nearest_labels)
def find_eigenvector(A, tolerance=0.00001): guess = [1 for __ in A] while True: # 计算结果向量 result = matrix_operate(A, guess) # 向量的模 length = magnitude(result) # 下一个向量,标量(1/length)和向量(result)的乘法, next_guess = scalar_multiply(1/length, result) # 两个向量的距离小于某个阙值则返回更新后的向量和向量的模 if distance(guess, next_guess) < tolerance: return next_guess, length # eigenvector, eigenvalue guess = next_guess
def knn_classify(k, labeled_points, new_point): """매개변수설명 k : 어느정도 가까운 것들을 찾는가 labeled_points : 분류에 사용 될 데이터목록들 new_point : 분류하고 싶은 데이터 1. 분류에 사용될 데이터들을 분류 될 데이터와 거리 순으로 정렬한다. 2. 정렬된 데이터 중에서 k 거리 이내에 있는 데이터 목록만 따로 majority_vote에 넘겨서 k 거리이내의 데이터들 중에 가장 많이 포함되 있는 라벨을 찾는다. """ """each labeled point should be a pair (point, label)""" # order the labeled points from nearest to farthest by_distance = sorted(labeled_points, key=lambda point_label: distance(point_label[0], new_point)) # find the labels for the k closest k_nearest_labels = [label for _, label in by_distance[:k]] # and let them vote return majority_vote(k_nearest_labels)
def knn_classify(k, labeled_points, new_point): """ each labeled point should be a pair (point, label) in our case: - labeled_points = [([longitude, latitude], label), ... ] - by_distance - sorted labeled_points by distance to new_point (in ascending order, so closest points are in the beginning) - labeled_point[0] = [longitude, latitude]; new_point = [longitude, latitude] - distance - standard euclidean distance (squared distance of coordinates) """ # order the labeled points from nearest to farthest by_distance = sorted(labeled_points, key=lambda labeled_point: distance(labeled_point[0], new_point)) # find the labels for the k closest (as mentioned, closest points are in the # beginning of the list); by_distance = [([longitude, latitude], language), ... ] # so we unpack tuple ([longitude, latitude], language) into _, label # we can also write [point[1] for point in by_distance[:k]] k_nearest_labels = [label for _, label in by_distance[:k]] # and let them vote return majority_vote(k_nearest_labels)
def random_distances(dim, num_pairs): return [ distance(random_point(dim), random_point(dim)) for _ in range(num_pairs) ] # if __name__ == "__main__": # # # try several different values for k # for k in [1, 3, 5, 7]: # num_correct = 0 # # for location, actual_language in cities: # # other_cities = [other_city # for other_city in cities # if other_city != (location, actual_language)] # # predicted_language = knn_classify(k, other_cities, location) # # if predicted_language == actual_language: # num_correct += 1 # # print k, "neighbor[s]:", num_correct, "correct out of", len(cities) # # dimensions = range(1, 101, 5) # # avg_distances = [] # min_distances = [] # # random.seed(0) # for dim in dimensions: # distances = random_distances(dim, 10000) # 10,000 random pairs # avg_distances.append(mean(distances)) # track the average # min_distances.append(min(distances)) # track the minimum # print dim, min(distances), mean(distances), min(distances) / mean(distances)
from linear_algebra import distance from typing import Tuple, List from linear_algebra import vector_mean from statistics import standard_deviation # we are trying to create clusters Vector = List[float] a_to_b = distance([63, 150], [67, 160]) a_to_c = distance([63, 150], [70, 171]) b_to_c = distance([67, 160], [70, 171]) def scale(data: List[Vector]) -> Tuple[Vector, Vector]: dim = len(data[0]) means = vector_mean(data) stdevs = [standard_deviation([vector[i] for vector in data]) for i in range(dim)] return means, stdevs vectors = [[ - 3 , - 1 , 1 ], [ - 1 , 0 , 1 ], [ 1 , 1 , 1 ]] means, stdevs = scale(vectors) assert means == [- 1, 0 , 1] assert stdevs == [2, 1, 0] def rescale(data: List[Vector]) -> List[Vector]: dim = len(data[0]) means, stdevs = scale(data)
def knn_classify(k, labeled_points, new_point): by_distance = sorted(labeled_points, key=lambda (point, _): distance(point, new_point)) k_nearest_labels = [label for _, label in by_distance[:k]] return majority_vote(k_nearest_labels)
def random_distances(dim, num_pairs): return [distance(random_point(dim), random_point(dim)) for _ in range(num_pairs)]
y, theta_0, alpha_0) if __name__ == "__main__": print("using the gradient") v = [random.randint(-10, 10) for i in range(3)] tolerance = 0.0000001 while True: #print v, sum_of_squares(v) gradient = sum_of_squares_gradient(v) # compute the gradient at v next_v = step(v, gradient, -0.01) # take a negative gradient step if distance(next_v, v) < tolerance: # stop if we're converging break v = next_v # continue if we're not print(v) print("minimum v", v) print("minimum value", sum_of_squares(v)) print() print("using minimize_batch") v = [random.randint(-10, 10) for i in range(3)] v = minimize_batch(sum_of_squares, sum_of_squares_gradient, v) print("minimum v", v)
def cluster_distance(cluster1, cluster2, distance_agg=min): """finds the aggregate distance between elements of cluster1 and elements of cluster2""" return distance_agg([distance(input1, input2) for input1 in get_values(cluster1) for input2 in get_values(cluster2)])
def knn_classify(k, labeled_points, new_point): distance_sort = lambda city: distance(city[0], new_point) by_distance = sorted(labeled_points, key=distance_sort) k_nearest_labells = [label for _, label in by_distance[:k]] return majority_vote(k_nearest_labells)
negate_all(gradient_fn), x, y, theta_0, alpha_0) if __name__ == "__main__": print("using the gradient") v = [random.randint(-10,10) for i in range(3)] tolerance = 0.0000001 while True: #print v, sum_of_squares(v) gradient = sum_of_squares_gradient(v) # compute the gradient at v next_v = step(v, gradient, -0.01) # take a negative gradient step if distance(next_v, v) < tolerance: # stop if we're converging break v = next_v # continue if we're not print("minimum v", v) print("minimum value", sum_of_squares(v)) print() print("using minimize_batch") v = [random.randint(-10,10) for i in range(3)] v = minimize_batch(sum_of_squares, sum_of_squares_gradient, v) print("minimum v", v)
return add(v, step) def sum_of_squares_gradient(v: Vector) -> Vector: return [2 * v_i for v_i in v] # pick a random starting point v = [random.uniform(-10, 10) for i in range(3)] for epoch in range(1000): grad = sum_of_squares_gradient(v) v = gradient_step(v, grad, -0.01) print(epoch, v) assert distance(v, [0, 0, 0]) < 0.001 # x ranges from -50 t0 49, y is always 20 * x + 5 inputs = [(x, 20 * x + 5) for x in range(-50, 50)] def linear_gradient(x: float, y: float, theta: Vector) -> Vector: slope, intercept = theta predicted = slope * x + intercept error = (predicted - y) # squared_error = error ** 2 grad = [2 * error * x, 2 * error] return grad # start with random values for slope and intercept
def maximize_stochastic(target_fn, gradient_fn, x, y, theta_0, alpha_0=0.01): return minimize_stochastic(negate(target_fn), negate_all(gradient_fn) x, y, theta_0, alpha_0) if __name__ == "__main__": # pick a random starting point v = [random.randint(-10, 10) for i in range(3)] tolerance = 0.0000001 while True: gradient = sum_of_squares_gradient(v) # compute the gradient at v next_v = step(v, gradient, -0.01) # take a negative gradient step if distance(next_v, v) < tolerance: break v = next_v print("minimum v", v) print("minimum value", sum_of_squares(v)) print() print("using minimize_batch") v = [random.randint(-10,10) for i in range(3)] v = minimize_batch(sum_of_squares, sum_of_squares_gradient, v) print("minimum v", v) print("minimum value", sum_of_squares(v))
def cluster_distance(cluster1, cluster2, distance_agg=min): """ compute all the pairwise distances between cluster1 and cluster2 and apply _distance_agg to the resulting list """ return distance_agg([distance(input1, input2) for input1 in get_values(cluster1) for input2 in get_values(cluster2)])
return minimize_stochastic(negate(target_fn), negate_all(gradient_fn), x, y, theta_0, alpha_0) if __name__ == '__main__': print('using the gradient') v = [random.randint(-10, 10) for i in range(3)] tolerance = 0.0000001 while True: gradient = sum_of_squares_gradient(v) # computa o gradiente em v next_v = step(v, gradient, -0.01) # pega um passo gradiente negativo if distance(next_v, v) < tolerance: # para se estivermos convergindo break v = next_v # continua se não estivermos print(f'Minimum v: {v}') print(f'Minimum value: {sum_of_squares(v)}') print('------------------------------') print('Using minimize_batch') v = [random.randint(-10, 10) for i in range(3)] v = minimize_batch(sum_of_squares, sum_of_squares_gradient, v) print(f'minimum v: {v}') print(f'minimum value: {sum_of_squares(v)}')
def knn_classify(k, labeled_points, new_points): by_distance = sorted(labeled_points, key=lambda (point, _): distance(point, new_points)) k_nearest_labels = [label for _, label in by_distance[:k]] return majority_vote(k_nearest_labels)
def step(v, direction, step_size): return [v_i + step_size * direction_i for v_i, direction_i in zip(v,direction)] def sum_of_squares_gradient(v): return [2 * v_i for v_i in v] # Pick a random starting point. v = [random.randint(-10,10) for i in range(3)] tolerance = 0.0000001 while True: gradient = sum_of_squares_gradient(v) next_v = step(v, gradient, -0.01) if distance(next_v,v) < tolerance: break v = next_v print v step_sizes = [100, 10, 1, 0.1, 0.01, 0.001, 0.0001, 0.00001] def safe(f): """ return a new function that's the same as f, except that it outputs infinity whenever f produces an error""" def safe_f(*args,**kwargs): try: return f(*args,**kwargs) except: return float('inf')
def test_distance(self): self.assertEqual(5, distance([5, 4], [1, 1]))
def cluster_distance(cluster1, cluster2, distance_agg=min): return distance_agg([ distance(input1, input2) for input1 in get_values(cluster1) for input2 in get_values(cluster2) ])
C = la.scalar_multiply(10, A) print("10 * A = ", C) C = la.vector_mean([A, B]) print("A and B mean = ", C) C = la.dot(A, B) print("A dot B = ", C) C = la.sum_of_squares(A) print("A^2's summary = ", C) C = la.magnitude(A) print("A's magnitude = ", C) C = la.distance(A, B) print("A's distance = ", C) print() print("*** matrix ......") M = [[1, 2, 3], [5, 6, 7], [3, 6, 9]] print("M = ", M) shape = la.shape(M) print("M's shape = ", shape) row_1 = la.get_row(M, 1) print("M[1,:] = ", row_1) col_1 = la.get_column(M, 1) print("M[:1] = ", col_1)
plt.plot(xs, estimates, 'b+', label='Estimate') # blue + plt.legend(loc=9) plt.show() plt.close() # pick a random starting point v = [random.uniform(-10, 10) for i in range(3)] print(v) for epoch in range(1000): grad = sum_of_squares_gradient(v) # compute the gradient at v v = gradient_step(v, grad, -0.01) # take a fixed negative gradient step print(epoch, v) assert distance(v, [0, 0, 0]) < 0.001 # v should be close to 0 print("") print("Using gradient descent to fit models") # x ranges from -50 to 49, y is always 20 * x + 5 inputs = [(x, 20 * x + 5) for x in range(-50, 50)] # Start with random values for slope and intercept. theta = [random.uniform(-1, 1), random.uniform(-1, 1)] learning_rate = 0.001 for epoch in range(5000): # Compute the mean of the gradients grad = vector_mean([linear_gradient(x, y, theta) for x, y in inputs]) # Take a step in that direction