# coding: utf-8 '''进行二层网络的学习,每次更新参数后,并将训练数据和测试数据的识别精度分别显示出来 可以看到,这两个值都在增加,并且基本吻合,说明没有发生过拟合 ''' import sys, os sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定 import numpy as np import matplotlib.pyplot as plt from dataset.mnist import load_mnist from two_layer_net import TwoLayerNet # 读入数据 (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) #超参数 iters_num = 10000 # 适当设定梯度法循环的次数,也就是更新参数的次数 train_size = x_train.shape[0] #60000 batch_size = 100 learning_rate = 0.1 train_loss_list = [] #保存每次更新参数后的损失函数的值 train_acc_list = [] #保存每次经过1个epoch后所计算的训练数据的识别精度 test_acc_list = [] #同上,只不过是测试数据 iter_per_epoch = max(train_size / batch_size, 1) #表示更新多少次参数才把训练数据“看完” 600 #epoch是一个单位 一个epoch表示学习中所有训练数据均被使用过一次时的更新次数 for i in range(iters_num):
import numpy as np import sys, os sys.path.append("D:\\dev\\projects\\deep-learning-from-scratch") sys.path.append("D:\\dev\\projects\\deep-learning-from-scratch\\ch04") from dataset.mnist import load_mnist from two_layer_net import TwoLayerNet # (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) net = TwoLayerNet(input_size=784, hidden_size=100, output_size=10) print(net.params['W1'].shape) print(net.params['b1'].shape) print(net.params['W2'].shape) print(net.params['b2'].shape) x = np.random.rand(100, 784) y = net.predict(x) t = np.random.rand(100, 10) grads = net.numerical_gradient(x, t) print(grads['W1'].shape) print(grads['b1'].shape) print(grads['W2'].shape) print(grads['b2'].shape)
from common.optimizer import SGD from dataset import spiral import matplotlib.pyplot as plt from two_layer_net import TwoLayerNet # 하이퍼파라미터 설정 max_epoch = 300 batch_size = 30 hidden_size = 10 learning_rate = 1.0 # 데이터 읽기, 모델과 옵티마이저 생성 x, t = spiral.load_data( ) # (300,2), (300,3) : 300개의 학습데이터가 하나의 배치당 두개의 노드로 구성, t는 3개의 class 정답레이블(one_hot) #print(f'x.shape:{x.shape}, type(x):{type(x)}\nt.shape:{t.shape}, type(t):{type(t)}') model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3) optimizer = SGD(lr=learning_rate) # 학습에 사용하는 변수 data_size = len(x) max_iters = data_size // batch_size #소수점 버림. #print(data_size, batch_size, max_iters, 11//3);exit(1) total_loss = 0 loss_count = 0 loss_list = [] for epoch in range(max_epoch): # 데이터 뒤섞기 idx = np.random.permutation(data_size) #0~data_size-1 숫자를 랜덤으로 섞어줌. #print(f'idx.shape:{idx.shape}, idx:{idx}, sum:{sum(idx)}, {sum(i for i in range(300))}');exit(1) # 섞어준 index들을 인덱싱하여 데이터 섞어준다.
import sys, os sys.path.append(os.pardir) import numpy as np from mnist import load_mnist from two_layer_net import TwoLayerNet # 데이터 읽기 (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) iters_num = 10000 train_size = x_train.shape[0] batch_size = 100 learning_rate = 0.1 train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] # 기울기 계산 # grad = network.numerical_gradient(x_batch, t_batch) # 수치 미분 방식
# coding: utf-8 import sys, os sys.path.append(os.pardir) # 親ディレクトリのファイルをインポートするための設定 import numpy as np import matplotlib.pyplot as plt from dataset.mnist import load_mnist from two_layer_net import TwoLayerNet # データの読み込み (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) iters_num = 10000 # 繰り返しの回数を適宜設定する train_size = x_train.shape[0] batch_size = 100 learning_rate = 0.1 train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] # 勾配の計算 grad = network.numerical_gradient(x_batch, t_batch)
t = np.random.randint(2, size=1)[0] if t % 2 == 0: data.append(Point(int(-x[i]), int(-y[i]))) else: data.append(Point(int(-x[i]), int(y[i]))) else: t = np.random.randint(2, size=1)[0] if t % 2 == 0: data.append(Point(int(x[i]), int(-y[i]))) else: data.append(Point(int(x[i]), int(y[i]))) return data if __name__ == '__main__': net = TwoLayerNet(2, 3, 3) data = CreateData(2000) for i in data: if GetMaxIndex(i.kind) == 0: plt.plot(i.x, i.y, 'o-g') elif GetMaxIndex(i.kind) == 1: plt.plot(i.x, i.y, 'o-b') else: plt.plot(i.x, i.y, 'o-r') plt.show() for i in range(5000): x_batch, start, end = random_mat(data) t_batch = get_kind(data, start, end) grad = net.gradient(x_batch, t_batch)
import optimizer as op (images_train, labels_train), (imags_test, labels_test) = affine.process() accuracy_list = [] iters_num = 1000 train_size = images_train.shape[0] batch_size = 100 # rate=0.1 在优化器中不用了 #网络初始化 optimizer = dict() # 内置函数名。 train_loss = dict() network = dict() # 对象初始化化,或者说变量初始化 optimizer['sgd'] = op.Sgd() optimizer['adaGrad'] = op.AdaGrad() optimizer['momentu'] = op.Momentu() for key in optimizer.keys(): network[key] = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) train_loss[key] = [] # '''sgd = op.Sgd() 类型较多可以说考虑使用python中的字典 adagrid = op.AdaGrad() :前后单词都不一样 momentu = op.Momentu()''' # 一个优化器依次训练的,所以每次选取的数据是随机的,但是数据量大,在统计上还是能看出差别的 for key in optimizer.keys(): for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size) x_batch = images_train[batch_mask] t_batch = labels_train[batch_mask] #在样本中随机选择 grads = network[key].gradient(x_batch, t_batch) optimizer[key].update(network[key].param, grads) loss_value = network[key].loss(x_batch, t_batch) train_loss[key].append(loss_value)
from two_layer_net import TwoLayerNet net = TwoLayerNet(input_size=784, hidden_size=100, output_size=10) print(net.params['W1'].shape) print(net.params['b1'].shape) print(net.params['W2'].shape) print(net.params['b2'].shape)
class TwoLayerPerceptron: def __init__(self, input_size=2, hidden_size=15, output_size=1, train_size=5, batch_size=3, iter_nums=500, learning_rate=0.5): """ Keyword arguments: input_size -- 입력변수의 갯수 hidden_size -- 은닉층 크기 output_size -- 결과값 갯수 train_size -- 훈련 입력값의 갯수 batch_size -- 훈련 배치 갯수 iter_nums -- 반복 횟수 learning_rate -- 학습률 """ self.network = TwoLayerNet(input_size, hidden_size, output_size) self.train_size = train_size self.batch_size = batch_size self.iter_nums = iter_nums self.learning_rate = learning_rate self.accuracy_list = [] # 가중치 학습 # x는 2차원 배열, t는 1차원 label 배열; 훈련 메소드 def train(self, x, t): for i in range(self.iter_nums): grad = self.network.numerical_gradient(x, t) for key in ('W1', 'b1', 'W2', 'b2'): self.network.params[key] = self.learning_rate * grad[key] if i % 10 == 0: acc = self.accuracy(x, t) self.accuracy_list.append(acc) # 추정 값 계산 def predict(self, x): params = self.network.params W1, b1 = params['W1'], params['b1'] W2, b2 = params['W2'], params['b2'] a1 = np.dot(x, W1) + b1 z1 = sigmoid(a1) a2 = np.dot(z1, W2) + b2 y = softmax(a2) return y # 정확도 def accuracy(self, x, t): y = self.predict(x) y = np.argmax(y, axis=1) t = np.argmax(t, axis=1) accuracy = np.sum(y == t) / float(x.shape[0]) return accuracy # 정확도 그래프 그리기 def draw(self): x = np.arange(len(self.accuracy_list)) plt.plot(x, self.accuracy_list, label="train acc") plt.xlabel("Iteration Count") plt.ylabel("Accuracy") plt.ylim(0, 1.0) plt.legend(loc="lower right") plt.show()
t_train.append(i[1]) """ for i in database: x_train.append(i[0].flatten()) target=np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]) target[i[1]]=1 t_train.append(target) """ train_loss_list = [] #下面几个是参数 learning_rate = 0.6 #学习率(每次调整的权值) batch_size = 10 #每次训练使用的数据个数 iters_num = 8000 #训练次数 train_size = len(x_train) network = TwoLayerNet(input_size=27, hidden_size=200, output_size=9) all_loss = 0 total = 0 for i in range(iters_num): x_batch = [] t_batch = [] batch_mask = np.random.choice(train_size, batch_size) for j in batch_mask: x = x_train[j] t = t_train[j] clock = random.randint(0, 3) mirror = random.randint(1, 2) x = shuffle(x, clock, mirror) t = shuffle_move(t, clock, mirror)
optimizers = OrderedDict() optimizers["SGD"] = SGD() optimizers["Momentum"] = Momentum() optimizers["AdaGrad"] = AdaGrad() optimizers["Adam"] = Adam() inters_num = 2000 train_size = x_train.shape[0] batch_size = 100 markers = {"SGD": "o", "Momentum": "x", "AdaGrad": "s", "Adam": "D"} train_loss_list = {} for key in optimizers: net = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) optimizer = optimizers[key] train_loss_list[key] = [] for i in range(inters_num): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] y_batch = y_train[batch_mask] grads = net.gradient(x_batch, y_batch) optimizer.update(net.params, grads) loss = net.loss(x_batch, y_batch) train_loss_list[key].append(loss) x = np.arange(inters_num) plt.plot(x, train_loss_list[key], marker=markers[key], markevery=100, label=key)
import numpy as np from common.optimizer import SGD from dataset import spiral import matplotlib.pyplot as plt from two_layer_net import TwoLayerNet # ハイパーパラメータの設定 max_epoch = 300 batch_size = 30 hidden_size = 10 learning_rate = 1.0 # データの読み込み、モデルとオプティマイザの生成 x, t = spiral.load_data() model = TwoLayerNet(input_size=x.shape[1], hidden_size=hidden_size, output_size=t.shape[1]) optimizer = SGD(lr=learning_rate) # 学習で使用する変数 data_size = len(x) max_iters = data_size // batch_size total_loss = 0 loss_count = 0 loss_list = [] for epoch in range(max_epoch): # データのシャッフル idx = np.random.permutation(data_size) x = x[idx] t = t[idx]
from common.trainer import Trainer # データの読み込み (x_train, t_train), (x_test, t_test) = mnist.load_data() # 1次元へ整形 x_train, x_test = x_train.reshape(-1, 784), x_test.reshape(-1, 784) # 正規化 x_train, x_test = x_train.astype(np.float32) / 255.0, x_test.astype( np.float32) / 255.0 # One-hot-vector t_train, t_test = to_categorical(t_train), to_categorical(t_test) network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) max_epochs = 1000 trainer = Trainer(network, x_train, t_train, x_test, t_test, epochs=max_epochs, mini_batch_size=100, optimizer='SGD', optimizer_param={'lr': 0.001}, evaluate_sample_num_per_epoch=1000) trainer.train()
grads['b2'].shape #%% 미니배치 학습 구현하기 import numpy as np from dataset.mnist import load_mnist from two_layer_net import TwoLayerNet (x_train, t_train), (x_test, t_test) = load_mnist(normalize = True, one_hot_label = True) train_loss_list = [] iters_num = 10000 train_size = x_train.shape[0] batch_size = 100 learning_rate = 0.1 network = TwoLayerNet(784, 50, 10) for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] grad = network.numerical_gradient(x_batch, t_batch) for key in ('W1', 'b1', 'W2', 'b2'): network.params[key] -= learning_rate * grad[key] loss = network.loss(x_batch, t_batch) train_loss_list.append(loss)
import sys, os sys.path.append(os.pardir) import numpy as np from datase.tmnist import load_mnist from two_layer_net import TwoLayerNet # load data (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) x_batch = x_train[:3] t_batch = t_train[:3] grad_numerical = network.numerical_gradient(x_batch, t_batch) grad_backprop = network.gradient(x_batch, t_batch) for key in grad_numerical.keys(): diff = np.average(np.abs(grad_backprop[key] - grad_numerical[key])) print(key + ':' + str(diff))
from keras.datasets import cifar10 from keras.utils import to_categorical np.random.seed(10) (x_train, t_train), (x_test, t_test) = cifar10.load_data() x_train = x_train.astype('float32') / 255.0 x_test = x_test.astype('float32') / 255.0 t_train = to_categorical(t_train) t_test = to_categorical(t_test) x_test = x_test.reshape(-1, 32 * 32 * 3) x_train = x_train.reshape(-1, 32 * 32 * 3) network = TwoLayerNet(input_size=x_train.shape[1], hidden_size=200, output_size=t_train.shape[1]) iters_num = 10000 train_size = x_train.shape[0] batch_size = 100 learning_rate = 0.2 train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size)
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) train_loss_list = [] iters_num = 10000 train_size = x_train.shape[0] batch_size = 100 learning_rate = 0.1 train_acc_list = [] test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) for i in range(iters_num): # print('progress ...' + str(i)) # ミニバッチ取得 batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] # 勾配の計算 grad = network.gradient(x_batch, t_batch) # パラメータ更新 for key in ('W1', 'b1', 'W2', 'b2'): network.params[key] = learning_rate * grad[key]
# coding: utf-8 import sys, os sys.path.append(os.pardir) import numpy as np import matplotlib.pyplot as plt from dataset.mnist import load_mnist from two_layer_net import TwoLayerNet #MNIST 데이터 로드 (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) iters_num = 10000 #학습 iter 횟수 train_size = x_train.shape[0] print(x_train.shape[0]) batch_size = 100 #한번의 학습 iter때 사용할 input 개수 learning_rate = 0.1 train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) #한epoch의 크기를 설정해서 그 횟수 마다 정확도를 계산/저장/출력 하기 위함 for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size) #batch_size개 random숫자들을 0~60k 에서 뽑는다 x_batch = x_train[batch_mask] #training set에서 그 인덱스를 가져온다. t_batch = t_train[batch_mask] #답에서도 동일한 인덱스를 뽑아온다. #grad = network.numerical_gradient(x_batch, t_batch)
#xを正規化 sscaler_x = preprocessing.StandardScaler() sscaler_x.fit(x_train) x_train = sscaler_x.transform(x_train) x_test = sscaler_x.transform(x_test) #yの検証用データを保存 y_test_original = y_test.reshape(-1, 1) #yを正規化 sscaler_y = preprocessing.StandardScaler() sscaler_y.fit(y_train.reshape(-1, 1)) y_train = sscaler_y.transform(y_train.reshape(-1, 1)) y_test = sscaler_y.transform(y_test.reshape(-1, 1)) network = TwoLayerNet(input_size=13, hidden_size=50, output_size=1) iters_num = 10000 train_size = x_train.shape[0] batch_size = 100 learning_rate = 0.1 train_loss_list = [] for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] y_batch = y_train[batch_mask] # 勾配 grad = network.gradient(x_batch, y_batch)
from PIL import Image import os,sys import numpy as np import scipy.misc from files.machine_funcs import load_datas from two_layer_net import TwoLayerNet import pickle as pkl filelist = os.listdir("files/trainning/") x_train,t_train,x_test,t_test = load_datas() print(np.shape(x_train),np.shape(t_train)) network = TwoLayerNet(input_size=160,hidden_size=10,output_size=11) iters_num = 10000 train_size = x_train.shape[0] batch_size = 100 learning_rate = 0.01 train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = 1000#max(train_size/batch_size,1) for i in range(iters_num): batch_mask = np.random.choice(train_size,batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] grad = network.gradient(x_batch,t_batch)
import sys, os sys.path.append(os.pardir) import numpy as np import matplotlib.pyplot as plt from dataset.mnist import load_mnist from two_layer_net import TwoLayerNet (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) network = TwoLayerNet(784, 50, 10) iters = 10000 train_size = x_train.shape[0] batch_size = 100 learning_rate = 0.1 iter_epoch = max(train_size // batch_size, 1) train_loss_list = [] train_acc_list = [] test_acc_list = [] for i in range(iters): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] grad = network.gradient(x_batch, t_batch) for key in ("W1", "b1", "W2", "b2"): network.params[key] -= learning_rate * grad[key]
import numpy as np import affine from two_layer_net import TwoLayerNet import matplotlib.pyplot as plot import optimizer as op (images_train, labels_train), (imags_test, labels_test) = affine.process() train_loss_list = [] accuracy_list = [] iters_num = 1000 train_size = images_train.shape[0] batch_size = 100 rate = 0.01 #网络初始化 network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) optimizer = op.Sgd() for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size) x_batch = images_train[batch_mask] #little bracket is using function t_batch = labels_train[batch_mask] #在样本中随机选择一小撮 grads = network.gradient(x_batch, t_batch) optimizer.update(network.param, grads) # print(loss_value) loss_value = network.loss(x_batch, t_batch) train_loss_list.append(loss_value) accuracy_list.append(network.accuracy(x_batch, t_batch)) # 是随机概率,根本就没有提高 y = np.array(accuracy_list) x = range(len(accuracy_list)) plot.plot(x, y) plot.show()
def output_neuralnet(train_num=60000, epoch_num=1000, hidden_num=300, batch_size=100, learning_rate=0.1, data_num=1, ratechange=False, samecompare_epoch=0, comparevalues=False, seed=0): see_weight = False see_acc = True data2 = 2 random_change = True random.seed(seed) # データの読み込み (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) print(x_train.shape) x_train = x_train[:train_num] t_train = t_train[:train_num] train_size = x_train.shape[0] #学習データのランダム化 if (random_change): for i in range(train_size): a = random.randrange(10) for j in range(10): if (j == a): t_train[i][j] = 1 else: t_train[i][j] = 0 #重みを表示する if (see_weight): w11 = [] w12 = [] w21 = [] w22 = [] network = TwoLayerNet(input_size=784, hidden_size=hidden_num, output_size=10) train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) iters_num = int(iter_per_epoch * epoch_num) for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] # 勾配 #grad = network.numerical_gradient(x_batch, t_batch) grad = network.gradient(x_batch, t_batch) # 更新 for key in ('W1', 'b1', 'W2', 'b2'): network.params[key] -= learning_rate * grad[key] loss = network.loss(x_batch, t_batch) train_loss_list.append(loss) #途中で学習率の変更 if (ratechange and (i > iters_num / 2)): learning_rate = learning_rate / 2 ratechange = False #重みの表示用登録 if (see_weight): if (i < iters_num / 2): w11.append(network.params['W1'][300][5]) w12.append(network.params['W1'][300][6]) else: w21.append(network.params['W1'][300][5]) w22.append(network.params['W1'][300][6]) #print(w1) #print(w2) if i % (iter_per_epoch * 100) == 0: #sameに値が入っている場合は二つファイルを作成する必要がある. if (samecompare_epoch > 0 and i / iter_per_epoch == samecompare_epoch): y = network.predict(x_test) y = np.argmax(y, axis=1) f = open('data' + str(data2) + '.txt', 'w') count = 0 for i in y: f.write(str(i) + "\n") count += 1 f.write("count:" + str(count)) if (see_acc): train_acc = network.accuracy(x_train, t_train) test_acc = network.accuracy(x_test, t_test) train_acc_list.append(train_acc) test_acc_list.append(test_acc) #print(str(int(i/iter_per_epoch)) + ":" + str(train_acc) + str(test_acc)) print('{0} : {1:.4f} {2:.4f}'.format(int(i / iter_per_epoch), train_acc, test_acc)) y = network.predict(x_test) #クラス認識をする場合はコメント外す if (comparevalues): y = y.reshape(-1, ) else: y = np.argmax(y, axis=1) f = open('data' + str(data_num) + '.txt', 'w') count = 0 for i in y: f.write(str(i) + "\n") count += 1 if (see_weight): plt.plot(w11, w12, "ro") plt.plot(w21, w22, "o") plt.show()
import sys, os sys.path.append(os.pardir) import numpy as np import matplotlib.pyplot as plt from dataset.mnist import load_mnist from two_layer_net import TwoLayerNet (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) #하이퍼파라미터 iters_num = 10000 #반복 횟수 train_size = x_train.shape[0] batch_size = 100 #미니배치 크기 learning_rate = 0.1 train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) #1에폭당 반복 수 for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size) #미니배치 획득 x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] grad = network.gradient(x_batch, t_batch) #기울기 계산
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) # hyper params iters_num = 10000 train_size = x_train.shape[0] batch_size = 100 learning_rate = 0.1 train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) nw = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) for i in range(iters_num): print(str(i) + "/" + str(iters_num)) batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] #grad = nw.numerical_gradient(x_batch, t_batch) grad = nw.gradient(x_batch, t_batch) for key in ('W1', 'b1', 'W2', 'b2'): nw.params[key] -= learning_rate * grad[key] loss = nw.loss(x_batch, t_batch) train_loss_list.append(loss)
def setUp(self): self.two_layer_net = TwoLayerNet(2, 4, 3) self.x = np.random.randn(4, 2) self.t = np.random.randn(4, 3)
import sys, os sys.path.append(os.pardir) from common.trainer import Trainer from common.optimizer import SGD from dataset import spiral from two_layer_net import TwoLayerNet max_epoch = 300 batch_size = 30 hidden_size = 10 learning_rate = 1.0 x, t = spiral.load_data() model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3) optimizer = SGD(lr=learning_rate) trainer = Trainer(model, optimizer) trainer.fit(x, t, max_epoch=max_epoch, batch_size=batch_size, eval_interval=10) trainer.plot()
import numpy as np from dataset.mnist import load_mnist from two_layer_net import TwoLayerNet (x_train,t_train),(x_test,t_test) = \ load_mnist(normalize=True,one_hot_label=True) train_loss_list = [] #ハイパーパラメータ iters_num = 10000 train_size = x_train.shape[0] batch_size = 100 learning_rate = 0.1 network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) for i in range(iters_num): #ミニバッチの取得 batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] #勾配の計算 grad = network.numerical_gradient(x_batch, t_batch) #パラメーターの更新 for key in ("W1", "b1", "W2", "b2"): network.params[key] = learning_rate * grad[key] #学習経過の記録
from dataset.mnist import load_mnist from two_layer_net import TwoLayerNet (x_train, t_train), (x_test, t_test) = \ load_mnist(normalize=True, one_hot_label=True) network = TwoLayerNet(input_size=784, hidden_size=100, output_size=10) learning_rate = 0.1 for i in range(10000): print('Try: ' + str(i + 1)) grads = network.gradient(x_train, t_train) for key in network.params: network.params[key] -= learning_rate * grads[key] print('Accuracy: ' + str(network.accuracy(x_train, t_train)) + '\n')
# coding: utf-8 import sys, os sys.path.append(os.getcwd()) import numpy as np from dataset.mnist import load_mnist from two_layer_net import TwoLayerNet # 读入数据 (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) learning_rate = 0.1 network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) print("before first layer parames: ", network.layers['Affine1'].W[0]) network.params["W1"] = None print("after first layer parames: ", network.layers['Affine1'].W[0]) # iters_num = 10000 # train_size = x_train.shape[0] # batch_size = 100 # learning_rate = 0.1 # batch_mask = np.random.choice(train_size, batch_size) # x_batch = x_train[batch_mask] # t_batch = t_train[batch_mask] # # 梯度 # #grad = network.numerical_gradient(x_batch, t_batch) # grad = network.gradient(x_batch, t_batch) # print("before first layer parames: ", network.layers['Affine2'].W[0])
import numpy as np import sys, os sys.path.append(os.pardir) from dataset.mnist import load_mnist from two_layer_net import TwoLayerNet (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) train_loss_list = [] iters_num = 10000 train_size = x_train.shape[0] batch_size = 100 learning_rate = 0.1 network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] grad = network.numerical_gradient(x_batch, t_batch) # grad = network.grdient(x_batch, t_batch) for key in ('W1', 'b1', 'W2', 'b2'): network.params[key] -= learning_rate * grad[key] loss = network.loss(x_batch, t_batch) train_loss_list.append(loss)
# coding: utf-8 import sys, os sys.path.append(os.pardir) # 부모 디렉터리의 파일을 가져올 수 있도록 설정 import numpy as np from dataset.mnist import load_mnist from two_layer_net import TwoLayerNet # 데이터 읽기 (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) x_batch = x_train[:3] t_batch = t_train[:3] grad_numerical = network.numerical_gradient(x_batch, t_batch) grad_backprop = network.gradient(x_batch, t_batch) # 각 가중치의 절대 오차의 평균을 구한다. for key in grad_numerical.keys(): diff = np.average( np.abs(grad_backprop[key] - grad_numerical[key]) ) print(key + ":" + str(diff))
load_mnist(normalize=True, one_hot_label=True) train_loss_list = [] train_acc_list = [] test_acc_list = [] # ハイパーパラメータ iters_num = 10000 train_size = x_train.shape[0] batch_size = 100 learning_rate = 0.1 # 1エポックあたりの繰り返し数 iter_per_epoch = max(train_size / batch_size, 1) network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) for i in range(iters_num): #ミニバッチの取得 batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] # 勾配の計算 grad = network.numerical_gradient(x_batch, t_batch) # grad = network.gradient(x_batch, t_batch) # 高速版 # パラメータの更新 for key in ('W1', 'b1', 'W2', 'b2'): network.params[key] -= learning_rate * grad[key]
# coding: utf-8 import sys, os sys.path.append(os.pardir) import numpy as np from dataset.mnist import load_mnist from two_layer_net import TwoLayerNet # 데이터 읽기 (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) iters_num = 10000 train_size = x_train.shape[0] batch_size = 100 learning_rate = 0.1 train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] # 기울기 계산 #grad = network.numerical_gradient(x_batch, t_batch) # 수치 미분 방식