def gradient_check(): (x_train, t_train), (x_test, t_test) = load_mnist( normalize=True, one_hot_label=True) network = TwoLayerNet(28 * 28, 50, 10) # sampling data x_sample = x_train[:3] t_sample = t_train[:3] # gradient by numerical gradient gradient_numerical = network.numerical_gradient(x_sample, t_sample) # gradient by backpropagation gradient_backpropagation = network.gradient(x_sample, t_sample) # get differences between gradient_numerical and gradient_backpropagation for key in gradient_numerical.keys(): diff = np.average( np.abs(gradient_numerical[key] - gradient_backpropagation[key])) print('{0}: {1}'.format(key, diff))
def train(batch_size, iterate_num, learning_rate): """ Get the appropriate network parameters (weights, biases) by backpropagation. batch_size: data of this number are choosed from training data in each step iterate_num: the number of iteration for backpropagation learning_rate: learning rate for backpropagation """ # get training data and test data(test data are not used below.) (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) # initialized TwoLayerNet network = TwoLayerNet(28 * 28, 50, 10) # each image has 28*28 pixels # losses in each step losses = [] for i in range(iterate_num): # choose the training data for this step indices = np.random.choice(len(x_train), batch_size) x_train_batch = x_train[indices] t_train_batch = t_train[indices] # calculate the grad # grads = network.numerical_gradient(x_train_batch, t_train_batch) grads = network.gradient(x_train_batch, t_train_batch) # update the network parameters network.params['W1'] -= learning_rate * grads['W1'] network.params['b1'] -= learning_rate * grads['b1'] network.params['W2'] -= learning_rate * grads['W2'] network.params['b2'] -= learning_rate * grads['b2'] # record loss loss = network.loss(x_train_batch, t_train_batch) print('loss = {0}'.format(loss)) losses.append(loss) # show accuracy if i % (iterate_num / 10) == 0: print('train_acc = {0}'.format(network.accuracy(x_train, t_train))) print('test_acc = {0}'.format(network.accuracy(x_test, t_test))) return network, losses
# coding: utf-8 import os import sys sys.path.append(os.pardir) # 親ディレクトリのファイルをインポートするための設定 import numpy as np from dataset.mnist import load_mnist from ch05.two_layer_net import TwoLayerNet # データの読み込み (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) x_batch = x_train[:3] t_batch = t_train[:3] grad_numerical = network.numerical_gradient(x_batch, t_batch) grad_backprop = network.gradient(x_batch, t_batch) for key in grad_numerical.keys(): diff = np.average(np.abs(grad_backprop[key] - grad_numerical[key])) print(key + ":" + str(diff))
import sys, os sys.path.append(os.pardir) import numpy as np from ch05.two_layer_net import TwoLayerNet from dataset.mnist import load_mnist from common.functions import * import pickle from PIL import Image network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) filedir = os.path.dirname(os.path.realpath(__file__)) try: with open(os.path.join(filedir, 'params.pickle'), mode='rb') as f: network.refresh(pickle.load(f)) except: print('%s does not exist.' % 'params.pickle') def predict(image): return softmax(network.predict(image)) # return np.around(softmax(network.predict(image)), decimals=3) # test #(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) # t:teacher # #one_of_x_test = x_test[20] #one_of_t_test = t_test[20]
batch_size = 100 # 小批量数 learning_rate = 0.01 train_loss_list = {} index_list = [] optimizer_dict = OrderedDict() optimizer_dict['SGD'] = SGD() optimizer_dict['Momentum'] = Momentum() optimizer_dict['AdaGrad'] = AdaGrad() optimizer_dict['RMSProp'] = RMSprop() optimizer_dict['Adam'] = Adam() for key in optimizer_dict.keys(): train_loss_list[key] = [] network[key] = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) iter_per_epoch = max(train_size / batch_size, 1) # 一个epoch需要遍历的轮数 for i in range(0, iters_num): print('这是第%d轮' % i) # mini-batch batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] for key, optimizer in optimizer_dict.items(): grads = network[key].gradient(x_batch, t_batch) optimizer.update(network[key].params, grads)
import sys, os sys.path.append(os.path.dirname(os.path.abspath(__file__)) + \ '/deep-learning-from-scratch-master/') import numpy as np from dataset.mnist import load_mnist from ch05.two_layer_net import TwoLayerNet (x_train, t_train), (x_test, t_test) = \ load_mnist(normalize=True, one_hot_label=True) network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) iters_num = 10000 train_size = x_train.shape[0] batch_size = 100 learning_rate = 0.1 train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] # 誤差逆伝播法による grad = network.gradient(x_batch, t_batch)