def squared_clustering_errors(inputs: List[Vector], k: int) -> float: """finds the total squared error from k-means clustering the inputs""" clusterer = KMeans(k) clusterer.train(inputs) means = clusterer.means assignments = [clusterer.classify(input) for input in inputs] return sum(squared_distance(input, means[cluster]) for input, cluster in zip(inputs, assignments))
def squared_clustering_errors(inputs: List[Vector], k: int) -> float: # finds the total squared error from k-means clustering the inputs clusterer = KMeans(k) clusterer.train(inputs) means = clusterer.means assignments = [clusterer.classify(input) for input in inputs] return sum( squared_distance(input, means[cluster]) for input, cluster in zip(inputs, assignments))
def squared_clustering_errors(inputs: List[Vector], k: int) -> float: """Określa sumę błędów podniesionych do kwadratu uzyskanych w wyniku działania algorytmu k średnich""" clusterer = KMeans(k) clusterer.train(inputs) means = clusterer.means assignments = [clusterer.classify(input) for input in inputs] return sum( squared_distance(input, means[cluster]) for input, cluster in zip(inputs, assignments))
def classify(self, input): return min(range(self.k), key=lambda i: squared_distance(input, self.means[i]))
def main(): import random random.seed(0) # dane treningowe xs = [[0., 0], [0., 1], [1., 0], [1., 1]] ys = [[0.], [1.], [1.], [0.]] # rozpocznij od losowych wag network = [ # warstwa ukryta: 2 wartości wejściowe -> 2 wartości wyjściowe [[random.random() for _ in range(2 + 1)], # pierwszy ukryty neuron [random.random() for _ in range(2 + 1)]], # drugi ukryty neuron # warstwa wyjściowa: 2 wartości wejściowe -> 1 wynik [[random.random() for _ in range(2 + 1)]] # pierwszy neuron wyjściowy ] from scratch.gradient_descent import gradient_step import tqdm learning_rate = 1.0 for epoch in tqdm.trange(20000, desc="neural net for xor"): for x, y in zip(xs, ys): gradients = sqerror_gradients(network, x, y) # Zrób krok w kierunku gradientu dla każdego neuronu, w każdej warstwie. network = [[gradient_step(neuron, grad, -learning_rate) for neuron, grad in zip(layer, layer_grad)] for layer, layer_grad in zip(network, gradients)] # sprawdź, czy faktycznie implementuje bramkę XOR assert feed_forward(network, [0, 0])[-1][0] < 0.01 assert feed_forward(network, [0, 1])[-1][0] > 0.99 assert feed_forward(network, [1, 0])[-1][0] > 0.99 assert feed_forward(network, [1, 1])[-1][0] < 0.01 xs = [binary_encode(n) for n in range(101, 1024)] ys = [fizz_buzz_encode(n) for n in range(101, 1024)] NUM_HIDDEN = 25 network = [ # warstwa ukryta: 10 wejść -> NUM_HIDDEN wyjść [[random.random() for _ in range(10 + 1)] for _ in range(NUM_HIDDEN)], # warstwa wyjściowa: NUM_HIDDEN wejść -> 4 wyjść [[random.random() for _ in range(NUM_HIDDEN + 1)] for _ in range(4)] ] from scratch.linear_algebra import squared_distance learning_rate = 1.0 with tqdm.trange(500) as t: for epoch in t: epoch_loss = 0.0 for x, y in zip(xs, ys): predicted = feed_forward(network, x)[-1] epoch_loss += squared_distance(predicted, y) gradients = sqerror_gradients(network, x, y) # Zrób krok w kierunku gradientu dla każdego neuronu w każdej warstwie network = [[gradient_step(neuron, grad, -learning_rate) for neuron, grad in zip(layer, layer_grad)] for layer, layer_grad in zip(network, gradients)] t.set_description(f"fizz buzz (loss: {epoch_loss:.2f})") num_correct = 0 for n in range(1, 101): x = binary_encode(n) predicted = argmax(feed_forward(network, x)[-1]) actual = argmax(fizz_buzz_encode(n)) labels = [str(n), "fizz", "buzz", "fizzbuzz"] print(n, labels[predicted], labels[actual]) if predicted == actual: num_correct += 1 print(num_correct, "/", 100)
def classify(self, input: Vector) -> int: """return the index of the cluster closest to the input""" return min(range(self.k), key=lambda i: squared_distance(input, self.means[i]))
def main(): inputs: List[List[float]] = [[-14,-5],[13,13],[20,23],[-19,-11],[-9,-16],[21,27],[-49,15],[26,13],[-46,5],[-34,-1],[11,15],[-49,0],[-22,-16],[19,28],[-12,-8],[-13,-19],[-41,8],[-11,-6],[-25,-9],[-18,-3]] random.seed(12) # so you get the same results as me clusterer = KMeans(k=3) clusterer.train(inputs) means = sorted(clusterer.means) # sort for the unit test assert len(means) == 3 # Check that the means are close to what we expect. assert squared_distance(means[0], [-44, 5]) < 1 assert squared_distance(means[1], [-16, -10]) < 1 assert squared_distance(means[2], [18, 20]) < 1 random.seed(0) clusterer = KMeans(k=2) clusterer.train(inputs) means = sorted(clusterer.means) assert len(means) == 2 assert squared_distance(means[0], [-26, -5]) < 1 assert squared_distance(means[1], [18, 20]) < 1 from matplotlib import pyplot as plt def squared_clustering_errors(inputs: List[Vector], k: int) -> float: """finds the total squared error from k-means clustering the inputs""" clusterer = KMeans(k) clusterer.train(inputs) means = clusterer.means assignments = [clusterer.classify(input) for input in inputs] return sum(squared_distance(input, means[cluster]) for input, cluster in zip(inputs, assignments)) # now plot from 1 up to len(inputs) clusters ks = range(1, len(inputs) + 1) errors = [squared_clustering_errors(inputs, k) for k in ks] plt.plot(ks, errors) plt.xticks(ks) plt.xlabel("k") plt.ylabel("total squared error") plt.title("Total Error vs. # of Clusters") # plt.show() plt.savefig('im/total_error_vs_num_clusters') plt.gca().clear() image_path = r"girl_with_book.jpg" # wherever your image is import matplotlib.image as mpimg img = mpimg.imread(image_path) / 256 # rescale to between 0 and 1 # .tolist() converts a numpy array to a Python list pixels = [pixel.tolist() for row in img for pixel in row] clusterer = KMeans(5) clusterer.train(pixels) # this might take a while def recolor(pixel: Vector) -> Vector: cluster = clusterer.classify(pixel) # index of the closest cluster return clusterer.means[cluster] # mean of the closest cluster new_img = [[recolor(pixel) for pixel in row] # recolor this row of pixels for row in img] # for each row in the image plt.close() plt.imshow(new_img) plt.axis('off') # plt.show() plt.savefig('im/recolored_girl_with_book.jpg') plt.gca().clear() base_cluster = bottom_up_cluster(inputs) three_clusters = [get_values(cluster) for cluster in generate_clusters(base_cluster, 3)] # sort smallest to largest tc = sorted(three_clusters, key=len) assert len(tc) == 3 assert [len(c) for c in tc] == [2, 4, 14] assert sorted(tc[0]) == [[11, 15], [13, 13]] plt.close() for i, cluster, marker, color in zip([1, 2, 3], three_clusters, ['D','o','*'], ['r','g','b']): xs, ys = zip(*cluster) # magic unzipping trick plt.scatter(xs, ys, color=color, marker=marker) # put a number at the mean of the cluster x, y = vector_mean(cluster) plt.plot(x, y, marker='$' + str(i) + '$', color='black') plt.title("User Locations -- 3 Bottom-Up Clusters, Min") plt.xlabel("blocks east of city center") plt.ylabel("blocks north of city center") # plt.show() plt.savefig('im/bottom_up_clusters_min.png') plt.gca().clear() plt.close() base_cluster_max = bottom_up_cluster(inputs, max) three_clusters_max = [get_values(cluster) for cluster in generate_clusters(base_cluster_max, 3)] for i, cluster, marker, color in zip([1, 2, 3], three_clusters_max, ['D','o','*'], ['r','g','b']): xs, ys = zip(*cluster) # magic unzipping trick plt.scatter(xs, ys, color=color, marker=marker) # put a number at the mean of the cluster x, y = vector_mean(cluster) plt.plot(x, y, marker='$' + str(i) + '$', color='black') plt.title("User Locations -- 3 Bottom-Up Clusters, Max") plt.xlabel("blocks east of city center") plt.ylabel("blocks north of city center") plt.savefig('im/bottom_up_clusters_max.png') plt.gca().clear()
def main(): import random random.seed(0) # training data xs = [[0., 0], [0., 1], [1., 0], [1., 1]] ys = [[0.], [1.], [1.], [0.]] # start with random weights network = [ # hidden layer: 2 inputs -> 2 outputs [[random.random() for _ in range(2 + 1)], # 1st hidden neuron [random.random() for _ in range(2 + 1)]], # 2nd hidden neuron # output layer: 2 inputs -> 1 output [[random.random() for _ in range(2 + 1)]] # 1st output neuron ] from scratch.gradient_descent import gradient_step import tqdm learning_rate = 1.0 for epoch in tqdm.trange(20000, desc="neural net for xor"): for x, y in zip(xs, ys): gradients = sqerror_gradients(network, x, y) # Take a gradient step for each neuron in each layer network = [[gradient_step(neuron, grad, -learning_rate) for neuron, grad in zip(layer, layer_grad)] for layer, layer_grad in zip(network, gradients)] # check that it learned XOR assert feed_forward(network, [0, 0])[-1][0] < 0.01 assert feed_forward(network, [0, 1])[-1][0] > 0.99 assert feed_forward(network, [1, 0])[-1][0] > 0.99 assert feed_forward(network, [1, 1])[-1][0] < 0.01 xs = [binary_encode(n) for n in range(101, 1024)] ys = [fizz_buzz_encode(n) for n in range(101, 1024)] NUM_HIDDEN = 25 network = [ # hidden layer: 10 inputs -> NUM_HIDDEN outputs [[random.random() for _ in range(10 + 1)] for _ in range(NUM_HIDDEN)], # output_layer: NUM_HIDDEN inputs -> 4 outputs [[random.random() for _ in range(NUM_HIDDEN + 1)] for _ in range(4)] ] from scratch.linear_algebra import squared_distance learning_rate = 1.0 with tqdm.trange(500) as t: for epoch in t: epoch_loss = 0.0 for x, y in zip(xs, ys): predicted = feed_forward(network, x)[-1] epoch_loss += squared_distance(predicted, y) gradients = sqerror_gradients(network, x, y) # Take a gradient step for each neuron in each layer network = [[gradient_step(neuron, grad, -learning_rate) for neuron, grad in zip(layer, layer_grad)] for layer, layer_grad in zip(network, gradients)] t.set_description(f"fizz buzz (loss: {epoch_loss:.2f})") num_correct = 0 for n in range(1, 101): x = binary_encode(n) predicted = argmax(feed_forward(network, x)[-1]) actual = argmax(fizz_buzz_encode(n)) labels = [str(n), "fizz", "buzz", "fizzbuzz"] print(n, labels[predicted], labels[actual]) if predicted == actual: num_correct += 1 print(num_correct, "/", 100)
def main(): inputs: List[List[float]] = [[-14, -5], [13, 13], [20, 23], [-19, -11], [-9, -16], [21, 27], [-49, 15], [26, 13], [-46, 5], [-34, -1], [11, 15], [-49, 0], [-22, -16], [19, 28], [-12, -8], [-13, -19], [-41, 8], [-11, -6], [-25, -9], [-18, -3]] random.seed(12) # so you get the same results as me clusterer = KMeans(k=3) clusterer.train(inputs) means = sorted(clusterer.means) # sort for the unit test assert len(means) == 3 # Check that the means are close to what we expect. assert squared_distance(means[0], [-44, 5]) < 1 assert squared_distance(means[1], [-16, -10]) < 1 assert squared_distance(means[2], [18, 20]) < 1 random.seed(0) clusterer = KMeans(k=2) clusterer.train(inputs) means = sorted(clusterer.means) assert len(means) == 2 assert squared_distance(means[0], [-26, -5]) < 1 assert squared_distance(means[1], [18, 20]) < 1 from matplotlib import pyplot as plt def squared_clustering_errors(inputs: List[Vector], k: int) -> float: """finds the total squared error from k-means clustering the inputs""" clusterer = KMeans(k) clusterer.train(inputs) means = clusterer.means assignments = [clusterer.classify(input) for input in inputs] return sum( squared_distance(input, means[cluster]) for input, cluster in zip(inputs, assignments)) # now plot from 1 up to len(inputs) clusters ks = range(1, len(inputs) + 1) errors = [squared_clustering_errors(inputs, k) for k in ks] plt.plot(ks, errors) plt.xticks(ks) plt.xlabel("k") plt.ylabel("total squared error") plt.title("Total Error vs. # of Clusters") # plt.show() plt.savefig('im/total_error_vs_num_clusters') plt.gca().clear() image_path = r"girl_with_book.jpg" # wherever your image is import matplotlib.image as mpimg img = mpimg.imread(image_path) / 256 # rescale to between 0 and 1 # .tolist() converts a numpy array to a Python list pixels = [pixel.tolist() for row in img for pixel in row] clusterer = KMeans(5) clusterer.train(pixels) # this might take a while def recolor(pixel: Vector) -> Vector: cluster = clusterer.classify(pixel) # index of the closest cluster return clusterer.means[cluster] # mean of the closest cluster new_img = [ [recolor(pixel) for pixel in row] # recolor this row of pixels for row in img ] # for each row in the image plt.close() plt.imshow(new_img) plt.axis('off') # plt.show() plt.savefig('im/recolored_girl_with_book.jpg') plt.gca().clear() base_cluster = bottom_up_cluster(inputs) three_clusters = [ get_values(cluster) for cluster in generate_clusters(base_cluster, 3) ] # sort smallest to largest tc = sorted(three_clusters, key=len) assert len(tc) == 3 assert [len(c) for c in tc] == [2, 4, 14] assert sorted(tc[0]) == [[11, 15], [13, 13]] plt.close() for i, cluster, marker, color in zip([1, 2, 3], three_clusters, ['D', 'o', '*'], ['r', 'g', 'b']): xs, ys = zip(*cluster) # magic unzipping trick plt.scatter(xs, ys, color=color, marker=marker) # put a number at the mean of the cluster x, y = vector_mean(cluster) plt.plot(x, y, marker='$' + str(i) + '$', color='black') plt.title("User Locations -- 3 Bottom-Up Clusters, Min") plt.xlabel("blocks east of city center") plt.ylabel("blocks north of city center") # plt.show() plt.savefig('im/bottom_up_clusters_min.png') plt.gca().clear() plt.close() base_cluster_max = bottom_up_cluster(inputs, max) three_clusters_max = [ get_values(cluster) for cluster in generate_clusters(base_cluster_max, 3) ] for i, cluster, marker, color in zip([1, 2, 3], three_clusters_max, ['D', 'o', '*'], ['r', 'g', 'b']): xs, ys = zip(*cluster) # magic unzipping trick plt.scatter(xs, ys, color=color, marker=marker) # put a number at the mean of the cluster x, y = vector_mean(cluster) plt.plot(x, y, marker='$' + str(i) + '$', color='black') plt.title("User Locations -- 3 Bottom-Up Clusters, Max") plt.xlabel("blocks east of city center") plt.ylabel("blocks north of city center") plt.savefig('im/bottom_up_clusters_max.png') plt.gca().clear()
def classify(self, input: Vector) -> int: """Zwróć indeks najbliższego klastra.""" return min(range(self.k), key=lambda i: squared_distance(input, self.means[i]))
def main(): inputs: List[List[float]] = [[-14, -5], [13, 13], [20, 23], [-19, -11], [-9, -16], [21, 27], [-49, 15], [26, 13], [-46, 5], [-34, -1], [11, 15], [-49, 0], [-22, -16], [19, 28], [-12, -8], [-13, -19], [-41, 8], [-11, -6], [-25, -9], [-18, -3]] random.seed(12) # Dzięki temu uzyskasz taki sam wynik jak ja. clusterer = KMeans(k=3) clusterer.train(inputs) means = sorted(clusterer.means) # sortowanie dla testów jednostkowych assert len(means) == 3 # Sprawdź, czy średnie są takie, jakich oczekiwaliśmy assert squared_distance(means[0], [-44, 5]) < 1 assert squared_distance(means[1], [-16, -10]) < 1 assert squared_distance(means[2], [18, 20]) < 1 random.seed(0) clusterer = KMeans(k=2) clusterer.train(inputs) means = sorted(clusterer.means) assert len(means) == 2 assert squared_distance(means[0], [-26, -5]) < 1 assert squared_distance(means[1], [18, 20]) < 1 from matplotlib import pyplot as plt def squared_clustering_errors(inputs: List[Vector], k: int) -> float: """Określa sumę błędów podniesionych do kwadratu uzyskanych w wyniku działania algorytmu k średnich""" clusterer = KMeans(k) clusterer.train(inputs) means = clusterer.means assignments = [clusterer.classify(input) for input in inputs] return sum( squared_distance(input, means[cluster]) for input, cluster in zip(inputs, assignments)) # Wykonaj wykres dla podziału od 1 grupy do len(inputs) grup. ks = range(1, len(inputs) + 1) errors = [squared_clustering_errors(inputs, k) for k in ks] plt.plot(ks, errors) plt.xticks(ks) plt.xlabel("k") plt.ylabel("Suma kwadratow bledow") plt.title("Blad calkowity a liczba grup") plt.show() plt.savefig('im/total_error_vs_num_clusters') plt.gca().clear() image_path = r"girl_with_book.jpg" # ścieżka pliku obrazu import matplotlib.image as mpimg img = mpimg.imread( image_path ) / 256 # przeskalujmy, aby uzyskać wartości z przedziału od 0 do 1 # .tolist() konwertuje tablicę NumPy na obiekt list pixels = [pixel.tolist() for row in img for pixel in row] clusterer = KMeans(5) clusterer.train(pixels) # Operacja ta może być czasochłonna. def recolor(pixel: Vector) -> Vector: cluster = clusterer.classify(pixel) # indeks najbliższej grupy return clusterer.means[cluster] # średnia najbliższej grupy new_img = [ [recolor(pixel) for pixel in row] # Zmień kolor tego rzędu pikseli. for row in img ] # Wykonaj tę operację dla każdego wiersza obrazu. plt.close() plt.imshow(new_img) plt.axis('off') plt.show() plt.savefig('im/recolored_girl_with_book.jpg') plt.gca().clear() base_cluster = bottom_up_cluster(inputs) three_clusters = [ get_values(cluster) for cluster in generate_clusters(base_cluster, 3) ] # posortuj od najmniejszego do największego tc = sorted(three_clusters, key=len) assert len(tc) == 3 assert [len(c) for c in tc] == [2, 4, 14] assert sorted(tc[0]) == [[11, 15], [13, 13]] plt.close() for i, cluster, marker, color in zip([1, 2, 3], three_clusters, ['D', 'o', '*'], ['r', 'g', 'b']): xs, ys = zip(*cluster) # rozpakowywanie plt.scatter(xs, ys, color=color, marker=marker) # Wprowadź średnią klastra. x, y = vector_mean(cluster) plt.plot(x, y, marker='$' + str(i) + '$', color='black') plt.title("Miejsca zamieszkania (3 grupy, metoda bottom-up, minimum)") plt.xlabel("Liczba przecznic na wschod od centrum miasta ") plt.ylabel("Liczba przecznic na polnoc od centrum miasta ") plt.show() plt.savefig('im/bottom_up_clusters_min.png') plt.gca().clear() plt.close() base_cluster_max = bottom_up_cluster(inputs, max) three_clusters_max = [ get_values(cluster) for cluster in generate_clusters(base_cluster_max, 3) ] for i, cluster, marker, color in zip([1, 2, 3], three_clusters_max, ['D', 'o', '*'], ['r', 'g', 'b']): xs, ys = zip(*cluster) # rozpakowywanie plt.scatter(xs, ys, color=color, marker=marker) # Wprowadź średnią klastra. x, y = vector_mean(cluster) plt.plot(x, y, marker='$' + str(i) + '$', color='black') plt.title("Miejsca zamieszkania (3 grupy, metoda bottom-up, maksimum)") plt.xlabel("Liczba przecznic na wschod od centrum miasta ") plt.ylabel("Liczba przecznic na polnoc od centrum miasta ") plt.savefig('im/bottom_up_clusters_max.png') plt.gca().clear()