def scale(data_matrix): num_rows, num_cols = shape(data_matrix) means = [mean(get_column(data_matrix, j)) for j in range(num_cols)] stdevs = [ standard_deviation(get_column(data_matrix, j)) for j in range(num_cols) ] return means, stdevs
def least_squares_fit(x, y): """given training values for x and y, find the least-squares values of alpha and beta""" beta = correlation(x, y) * standard_deviation(y) / standard_deviation(x) alpha = mean(y) - beta * mean(x) return alpha, beta
plot_cities() # try several different values for k for k in range(1,76): num_correct = 0 for location, actual_language in cities: other_cities = [other_city for other_city in cities if other_city != (location, actual_language)] predicted_language = knn_classify(k, other_cities, location) if predicted_language == actual_language: num_correct += 1 print k, "neighbor[s]:", num_correct, "correct out of", len(cities) dimensions = range(1, 101, 5) avg_distances = [] min_distances = [] random.seed(0) for dim in dimensions: distances = random_distances(dim, 10000) # 10,000 random pairs avg_distances.append(mean(distances)) # track the average min_distances.append(min(distances)) # track the minimum print dim, min(distances), mean(distances), min(distances) / mean(distances)