def fizzbuzz_accuracy(low: int, hi: int, net: Layer) -> float: num_correct = 0 for n in range(low, hi): x = binary_encode(n) predicted = argmax(net.forward(x)) actual = argmax(fizz_buzz_encode(n)) if predicted == actual: num_correct += 1 return num_correct / (hi - low)
def main(): # XOR revisited # training data xs = [[0., 0], [0., 1], [1., 0], [1., 1]] ys = [[0.], [1.], [1.], [0.]] random.seed(0) net = Sequential([ Linear(input_dim=2, output_dim=2), Sigmoid(), Linear(input_dim=2, output_dim=1) ]) import tqdm optimizer = GradientDescent(learning_rate=0.1) loss = SSE() with tqdm.trange(3000) as t: for epoch in t: epoch_loss = 0.0 for x, y in zip(xs, ys): predicted = net.forward(x) epoch_loss += loss.loss(predicted, y) gradient = loss.gradient(predicted, y) net.backward(gradient) optimizer.step(net) t.set_description(f"xor loss {epoch_loss:.3f}") for param in net.params(): print(param) # FizzBuzz Revisited from scratch.neural_networks import binary_encode, fizz_buzz_encode, argmax xs = [binary_encode(n) for n in range(101, 1024)] ys = [fizz_buzz_encode(n) for n in range(101, 1024)] NUM_HIDDEN = 25 random.seed(0) net = Sequential([ Linear(input_dim=10, output_dim=NUM_HIDDEN, init='uniform'), Tanh(), Linear(input_dim=NUM_HIDDEN, output_dim=4, init='uniform'), Sigmoid() ]) def fizzbuzz_accuracy(low: int, hi: int, net: Layer) -> float: num_correct = 0 for n in range(low, hi): x = binary_encode(n) predicted = argmax(net.forward(x)) actual = argmax(fizz_buzz_encode(n)) if predicted == actual: num_correct += 1 return num_correct / (hi - low) optimizer = Momentum(learning_rate=0.1, momentum=0.9) loss = SSE() with tqdm.trange(1000) as t: for epoch in t: epoch_loss = 0.0 for x, y in zip(xs, ys): predicted = net.forward(x) epoch_loss += loss.loss(predicted, y) gradient = loss.gradient(predicted, y) net.backward(gradient) optimizer.step(net) accuracy = fizzbuzz_accuracy(101, 1024, net) t.set_description(f"fb loss: {epoch_loss:.2f} acc: {accuracy:.2f}") # Now check results on the test set print("test results", fizzbuzz_accuracy(1, 101, net)) random.seed(0) net = Sequential([ Linear(input_dim=10, output_dim=NUM_HIDDEN, init='uniform'), Tanh(), Linear(input_dim=NUM_HIDDEN, output_dim=4, init='uniform') # No final sigmoid layer now ]) optimizer = Momentum(learning_rate=0.1, momentum=0.9) loss = SoftmaxCrossEntropy() with tqdm.trange(100) as t: for epoch in t: epoch_loss = 0.0 for x, y in zip(xs, ys): predicted = net.forward(x) epoch_loss += loss.loss(predicted, y) gradient = loss.gradient(predicted, y) net.backward(gradient) optimizer.step(net) accuracy = fizzbuzz_accuracy(101, 1024, net) t.set_description(f"fb loss: {epoch_loss:.3f} acc: {accuracy:.2f}") # Again check results on the test set print("test results", fizzbuzz_accuracy(1, 101, net)) # Load the MNIST data import mnist # This will download the data, change this to where you want it. # (Yes, it's a 0-argument function, that's what the library expects.) # (Yes, I'm assigning a lambda to a variable, like I said never to do.) mnist.temporary_dir = lambda: '/tmp' # Each of these functions first downloads the data and returns a numpy array. # We call .tolist() because our "tensors" are just lists. train_images = mnist.train_images().tolist() train_labels = mnist.train_labels().tolist() assert shape(train_images) == [60000, 28, 28] assert shape(train_labels) == [60000] import matplotlib.pyplot as plt fig, ax = plt.subplots(10, 10) for i in range(10): for j in range(10): # Plot each image in black and white and hide the axes. ax[i][j].imshow(train_images[10 * i + j], cmap='Greys') ax[i][j].xaxis.set_visible(False) ax[i][j].yaxis.set_visible(False) # plt.show() # Load the MNIST test data test_images = mnist.test_images().tolist() test_labels = mnist.test_labels().tolist() assert shape(test_images) == [10000, 28, 28] assert shape(test_labels) == [10000] # Recenter the images # Compute the average pixel value avg = tensor_sum(train_images) / 60000 / 28 / 28 # Recenter, rescale, and flatten train_images = [[(pixel - avg) / 256 for row in image for pixel in row] for image in train_images] test_images = [[(pixel - avg) / 256 for row in image for pixel in row] for image in test_images] assert shape(train_images) == [60000, 784], "images should be flattened" assert shape(test_images) == [10000, 784], "images should be flattened" # After centering, average pixel should be very close to 0 assert -0.0001 < tensor_sum(train_images) < 0.0001 # One-hot encode the test data train_labels = [one_hot_encode(label) for label in train_labels] test_labels = [one_hot_encode(label) for label in test_labels] assert shape(train_labels) == [60000, 10] assert shape(test_labels) == [10000, 10] # Training loop import tqdm def loop(model: Layer, images: List[Tensor], labels: List[Tensor], loss: Loss, optimizer: Optimizer = None) -> None: correct = 0 # Track number of correct predictions. total_loss = 0.0 # Track total loss. with tqdm.trange(len(images)) as t: for i in t: predicted = model.forward(images[i]) # Predict. if argmax(predicted) == argmax(labels[i]): # Check for correct += 1 # correctness. total_loss += loss.loss(predicted, labels[i]) # Compute loss. # If we're training, backpropagate gradient and update weights. if optimizer is not None: gradient = loss.gradient(predicted, labels[i]) model.backward(gradient) optimizer.step(model) # And update our metrics in the progress bar. avg_loss = total_loss / (i + 1) acc = correct / (i + 1) t.set_description(f"mnist loss: {avg_loss:.3f} acc: {acc:.3f}") # The logistic regression model for MNIST random.seed(0) # Logistic regression is just a linear layer followed by softmax model = Linear(784, 10) loss = SoftmaxCrossEntropy() # This optimizer seems to work optimizer = Momentum(learning_rate=0.01, momentum=0.99) # Train on the training data loop(model, train_images, train_labels, loss, optimizer) # Test on the test data (no optimizer means just evaluate) loop(model, test_images, test_labels, loss) # A deep neural network for MNIST random.seed(0) # Name them so we can turn train on and off dropout1 = Dropout(0.1) dropout2 = Dropout(0.1) model = Sequential([ Linear(784, 30), # Hidden layer 1: size 30 dropout1, Tanh(), Linear(30, 10), # Hidden layer 2: size 10 dropout2, Tanh(), Linear(10, 10) # Output layer: size 10 ]) # Training the deep model for MNIST optimizer = Momentum(learning_rate=0.01, momentum=0.99) loss = SoftmaxCrossEntropy() # Enable dropout and train (takes > 20 minutes on my laptop!) dropout1.train = dropout2.train = True loop(model, train_images, train_labels, loss, optimizer) # Disable dropout and evaluate dropout1.train = dropout2.train = False loop(model, test_images, test_labels, loss)
def main(): # kolejne podejście do bramki XOR # dane treningowe xs = [[0., 0], [0., 1], [1., 0], [1., 1]] ys = [[0.], [1.], [1.], [0.]] random.seed(0) net = Sequential([ Linear(input_dim=2, output_dim=2), Sigmoid(), Linear(input_dim=2, output_dim=1) ]) import tqdm optimizer = GradientDescent(learning_rate=0.1) loss = SSE() with tqdm.trange(3000) as t: for epoch in t: epoch_loss = 0.0 for x, y in zip(xs, ys): predicted = net.forward(x) epoch_loss += loss.loss(predicted, y) gradient = loss.gradient(predicted, y) net.backward(gradient) optimizer.step(net) t.set_description(f"xor loss {epoch_loss:.3f}") for param in net.params(): print(param) # kolejne podejście do gry Fizz Buzz from scratch.neural_networks import binary_encode, fizz_buzz_encode, argmax xs = [binary_encode(n) for n in range(101, 1024)] ys = [fizz_buzz_encode(n) for n in range(101, 1024)] NUM_HIDDEN = 25 random.seed(0) net = Sequential([ Linear(input_dim=10, output_dim=NUM_HIDDEN, init='uniform'), Tanh(), Linear(input_dim=NUM_HIDDEN, output_dim=4, init='uniform'), Sigmoid() ]) def fizzbuzz_accuracy(low: int, hi: int, net: Layer) -> float: num_correct = 0 for n in range(low, hi): x = binary_encode(n) predicted = argmax(net.forward(x)) actual = argmax(fizz_buzz_encode(n)) if predicted == actual: num_correct += 1 return num_correct / (hi - low) optimizer = Momentum(learning_rate=0.1, momentum=0.9) loss = SSE() with tqdm.trange(1000) as t: for epoch in t: epoch_loss = 0.0 for x, y in zip(xs, ys): predicted = net.forward(x) epoch_loss += loss.loss(predicted, y) gradient = loss.gradient(predicted, y) net.backward(gradient) optimizer.step(net) accuracy = fizzbuzz_accuracy(101, 1024, net) t.set_description(f"fb loss: {epoch_loss:.2f} acc: {accuracy:.2f}") # teraz sprawdźmy wyniki na zbiorze testowym print("test results", fizzbuzz_accuracy(1, 101, net)) random.seed(0) net = Sequential([ Linear(input_dim=10, output_dim=NUM_HIDDEN, init='uniform'), Tanh(), Linear(input_dim=NUM_HIDDEN, output_dim=4, init='uniform') # teraz bez końcowej warstwy sigmoid ]) optimizer = Momentum(learning_rate=0.1, momentum=0.9) loss = SoftmaxCrossEntropy() with tqdm.trange(100) as t: for epoch in t: epoch_loss = 0.0 for x, y in zip(xs, ys): predicted = net.forward(x) epoch_loss += loss.loss(predicted, y) gradient = loss.gradient(predicted, y) net.backward(gradient) optimizer.step(net) accuracy = fizzbuzz_accuracy(101, 1024, net) t.set_description(f"fb loss: {epoch_loss:.3f} acc: {accuracy:.2f}") # ponownie sprawdzamy wyniki na zbiorze testowym print("test results", fizzbuzz_accuracy(1, 101, net)) # Pobranie danych MNIST import mnist # Ten fragment pobiera dane. Zmień ścieżkę na taką, jaką chcesz. # (Tak, to jest funkcja, która nie ma argumentów, tego właśnie oczekuje ta biblioteka). # (Tak, przypisuję lambdę do zmiennej, mimo że mówiłem, że nie należy tak robić). mnist.temporary_dir = lambda: '/tmp' # Każda z tych funkcji pobiera dane i zwraca tablicę numpy. # Wywołujemy .tolist(), ponieważ nasze "tensory" są tak naprawdę listami. train_images = mnist.train_images().tolist() train_labels = mnist.train_labels().tolist() assert shape(train_images) == [60000, 28, 28] assert shape(train_labels) == [60000] import matplotlib.pyplot as plt fig, ax = plt.subplots(10, 10) for i in range(10): for j in range(10): # Wyświetl każdy obraz jako czarno-biały i ukryj osie. ax[i][j].imshow(train_images[10 * i + j], cmap='Greys') ax[i][j].xaxis.set_visible(False) ax[i][j].yaxis.set_visible(False) plt.show() # Ładowanie danych testowych MNIST test_images = mnist.test_images().tolist() test_labels = mnist.test_labels().tolist() assert shape(test_images) == [10000, 28, 28] assert shape(test_labels) == [10000] # Centrowanie obrazów # Policz średnią wartość piksela avg = tensor_sum(train_images) / 60000 / 28 / 28 # Centrowanie, skalowanie i spłaszczanie train_images = [[(pixel - avg) / 256 for row in image for pixel in row] for image in train_images] test_images = [[(pixel - avg) / 256 for row in image for pixel in row] for image in test_images] assert shape(train_images) == [60000, 784], "obrazy powinny być spłaszczone" assert shape(test_images) == [10000, 784], "obrazy powinny być spłaszczone" # po centrowaniu wartość każdego piksela powinna być bliska 0 assert -0.0001 < tensor_sum(train_images) < 0.0001 # Kodowanie 1 z n, dane testowe train_labels = [one_hot_encode(label) for label in train_labels] test_labels = [one_hot_encode(label) for label in test_labels] assert shape(train_labels) == [60000, 10] assert shape(test_labels) == [10000, 10] # Pętla trenująca import tqdm def loop(model: Layer, images: List[Tensor], labels: List[Tensor], loss: Loss, optimizer: Optimizer = None) -> None: correct = 0 # Przechowuje liczbę poprawnych przewidywań. total_loss = 0.0 # Przechowuje całkowitą stratę. with tqdm.trange(len(images)) as t: for i in t: predicted = model.forward( images[i]) # Określ wartości przewidywane. if argmax(predicted) == argmax( labels[i]): # Sprawdź poprawność. correct += 1 total_loss += loss.loss(predicted, labels[i]) # Oblicz stratę. # Podczas treningu propaguj wstecznie gradient i zaktualizuj wagi. if optimizer is not None: gradient = loss.gradient(predicted, labels[i]) model.backward(gradient) optimizer.step(model) # Zaktualizuj metryki na pasku postępu. avg_loss = total_loss / (i + 1) acc = correct / (i + 1) t.set_description(f"mnist loss: {avg_loss:.3f} acc: {acc:.3f}") # Model regresji logistycznej na danych MNIST random.seed(0) # Regresja logistyczna to po prostu warstwa liniowa wraz z funkcją softmax model = Linear(784, 10) loss = SoftmaxCrossEntropy() # Ten optymalizator wydaje się działać poprawnie optimizer = Momentum(learning_rate=0.01, momentum=0.99) # Trenowanie modelu na danych treningowych loop(model, train_images, train_labels, loss, optimizer) # Testowanie na danych testowych (dlatego brak optymalizatora) loop(model, test_images, test_labels, loss) # Głęboka sieć neuronowa na danych MNIST random.seed(0) # Nazywamy je, żeby mieć możliwość włączania i wyłączania. dropout1 = Dropout(0.1) dropout2 = Dropout(0.1) model = Sequential([ Linear(784, 30), # Ukryta warstwa 1: rozmiar 30 dropout1, Tanh(), Linear(30, 10), # Ukryta warstwa 2: rozmiar 10 dropout2, Tanh(), Linear(10, 10) # Warstwa wyjściowa: rozmiar 10 ]) # Trenowanie modelu głębokiego na danych MNIST optimizer = Momentum(learning_rate=0.01, momentum=0.99) loss = SoftmaxCrossEntropy() # Włącz warstwę dropout i rozpocznij trening (na moim laptopie zajęło to ponad 20 minut!) dropout1.train = dropout2.train = True loop(model, train_images, train_labels, loss, optimizer) # Wyłącz warstwę dropout i przetestuj dropout1.train = dropout2.train = False loop(model, test_images, test_labels, loss)