def __train(weight_init_std): bn_network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std, use_batchnorm=True) network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100],output_size=10, weight_init_std=weight_init_std) optimizer = SGD(lr=learning_rate) train_acc_list = [] bn_train_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0 for i in range(1000000000): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] for _network in (bn_network, network): grads = _network.gradient(x_batch, t_batch) optimizer.update(_network.params, grads) if i % iter_per_epoch == 0: train_acc = network.accuracy(x_train, t_train) bn_train_acc = bn_network.accuracy(x_train, t_train) train_acc_list.append(train_acc) bn_train_acc_list.append(bn_train_acc) print("epoch:" + str(epoch_cnt) + " | " + str(train_acc) + " - " + str(bn_train_acc)) epoch_cnt += 1 if epoch_cnt >= max_epochs: break return train_acc_list, bn_train_acc_list
def __train(weight_init_std): bn_network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std, use_batchnorm=True) network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std) optimizer = SGD(lr=learning_rate) train_acc_list = [] bn_train_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0 for i in range(1000000000): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] for _network in (bn_network, network): grads = _network.gradient(x_batch, t_batch) optimizer.update(_network.params, grads) if i % iter_per_epoch == 0: train_acc = network.accuracy(x_train, t_train) bn_train_acc = bn_network.accuracy(x_train, t_train) train_acc_list.append(train_acc) bn_train_acc_list.append(bn_train_acc) print("epoch:" + str(epoch_cnt) + " | " + str(train_acc) + " - " + str(bn_train_acc)) epoch_cnt += 1 if epoch_cnt >= max_epochs: break return train_acc_list, bn_train_acc_list
def train(train_x, train_label, weight_init_std, learning_rate, max_epoch, batch_size): """ 构造带有BN层的神经网络和不带BN层的神经网络。测试BN层的效果 :param train_x: :param train_label: :param weight_init_std: 参数初始化方式 :param learning_rate: :param max_epoch: 测试的epoch数 :param batch_size: :return: """ bn_network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std, use_batchnorm=True) network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std, use_batchnorm=False) optimizer = SGD(learning_rate) train_acc_list = [] bn_train_acc_list = [] train_size = train_x.shape[0] iter_per_epoch = max(train_size / batch_size, 1) batch_mask = np.arange(train_size) np.random.shuffle(batch_mask) epoch_cnt = 0 left = 0 iteration = int(iter_per_epoch * max_epoch) for i in range(iteration): # 获取一个batch的数据,更新left值 batch_x, batch_label, left = get_batch(train_x, train_label, batch_mask, batch_size, left) # 两个网络分别更新 for _network in (bn_network, network): grads = _network.gradient(batch_x, batch_label) optimizer.update(_network.params, grads) # 每一个epoch记录一个在测试集上的准确率 if i % iter_per_epoch == 0: train_acc = network.accuracy(train_x, train_label) bn_train_acc = bn_network.accuracy(train_x, train_label) train_acc_list.append(train_acc) bn_train_acc_list.append(bn_train_acc) print("epoch:" + str(epoch_cnt) + " | " + str(train_acc) + " - " + str(bn_train_acc)) epoch_cnt += 1 return train_acc_list, bn_train_acc_list
def main(): (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) x_train = x_train[:300] t_train = t_train[:300] max_epochs = 201 train_size = x_train.shape[0] batch_size = 100 learning_rate = 0.01 use_dropout = True dropout_ratio = 0.2 train_acc_list = [] test_acc_list = [] network = MultiLayerNetExtend( input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, use_dropout=use_dropout, dropout_ration=dropout_ratio) optimizer = SGD(lr=learning_rate) iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0 for i in range(1000000000): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] grads = network.gradient(x_batch, t_batch) optimizer.update(network.params, grads) if i % iter_per_epoch == 0: train_acc = network.accuracy(x_train, t_train) test_acc = network.accuracy(x_test, t_test) train_acc_list.append(train_acc) test_acc_list.append(test_acc) print("epoch:" + str(epoch_cnt) + ", train acc:" + str(train_acc) + \ ", test acc:" + str(test_acc)) epoch_cnt += 1 if epoch_cnt >= max_epochs: break markers = {'train': 'o', 'test': 's'} x = np.arange(max_epochs) plt.plot(x, train_acc_list, marker='o', label='train', markevery=10) plt.plot(x, test_acc_list, marker='s', label='test', markevery=10) plt.xlabel("epochs") plt.ylabel("accuracy") plt.ylim(0, 1.0) plt.legend(loc='lower right') plt.show()
def __train(weight_init_std): bn_net = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std, use_batchnorm=True) net = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std) optimizer = SGD(lr=learning_rate) train_acc_list = [] bn_train_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0 for i in range(1000000000): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] for _net in (bn_net, net): grads = _net.gradient(x_batch, t_batch) optimizer.update(_net.params, grads) if i % iter_per_epoch == 0: train_acc = net.accuracy(x_train, t_train) bn_train_acc = bn_net.accuracy(x_train, t_train) train_acc_list.append(train_acc) bn_train_acc_list.append(bn_train_acc) print("EPOCH: {0} | NET_ACC({1}) - BN_NET_ACC({2})".format( epoch_cnt, train_acc, bn_train_acc)) epoch_cnt += 1 if epoch_cnt >= max_epoches: break return train_acc_list, bn_train_acc_list
def __trainning(weight_init_std, index): epoch_num = 0 acc_train_list = [] acc_train_BN_list = [] input_size = 784 hidden_size_list = [100] * 5 output_size = 10 activation = 'ReLu' weight_decay_lambda = 1 network = MultiLayerNetExtend(input_size=input_size, hidden_size_list=hidden_size_list, output_size=output_size, activation=activation, weight_init_std=weight_init_std, weight_decay_lambda=weight_decay_lambda, use_BatchNormalization=False, use_weight_decay=False) network_BN = MultiLayerNetExtend(input_size=784, hidden_size_list=hidden_size_list, output_size=output_size, activation=activation, weight_init_std=weight_init_std, weight_decay_lambda=weight_decay_lambda, use_BatchNormalization=True, use_weight_decay=False) # 开始训练,训练分下面几步 # 1. 从样本中随机挑选出batch_size个样本 # 2. 前向传播、反向传播、获取梯度 # 3. 更新梯度 # 4. 重复1 ~ 2直至循环结束 for i in range(iter_num): print('现在是W' + str(index) + '的第' + str(i) + '轮循环') # 挑选mini_batch batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] # 获取梯度,更新梯度 for network_ in (network, network_BN): grads = network_.gradient(x_train, t_train, use_weight_decay=False) # optimizer_SGD.update(params = network_.params, grads = grads) if network_ == network: optimizer.update(params=network_.params, grads=grads) else: optimizer_BN.update(params=network_.params, grads=grads) # 每一个epoch就计算两个网络的精度然后装进数组 if i % iter_num_per_epch == 0: network_acc = network.accuracy(x_batch, t_batch) network_BN_acc = network_BN.accuracy(x_batch, t_batch) acc_train_list.append(network_acc) acc_train_BN_list.append(network_BN_acc) epoch_num += 1 if epoch_num >= epoch_cnt_max: break return acc_train_list, acc_train_BN_list
optimizer = Sgd(learning_rate) bn_optimizer = Sgd(learning_rate) # 학습하면서 정확도의 변화를 기록 for i in range(iterations): # 미니 배치를 랜덤하게 선택(0~999 숫자들 중 128개를 랜덤하게 선택) mask = np.random.choice(train_size, batch_size) x_batch = X_train[mask] y_batch = Y_train[mask] # 배치 정규화를 사용하지 않는 신경망에서 gradient를 계산. gradients = neural_net.gradient(x_batch, y_batch) # 파라미터 업데이트(갱신) - W(가중치), b(편향)을 업데이트 optimizer.update(neural_net.params, gradients) # 업데이트된 파라미터들을 사용해서 배치 데이터의 정확도 계산 acc = neural_net.accuracy(x_batch, y_batch) # 정확도를 기록 train_accuracies.append(acc) # 배치 정규화를 사용하는 신경망에서 같은 작업을 수행. bn_gradients = bn_neural_net.gradient(x_batch, y_batch) # gradient 계산 bn_optimizer.update(bn_neural_net.params, bn_gradients) # W, b 업데이트 bn_acc = bn_neural_net.accuracy(x_batch, y_batch) # 정확도 계산 bn_train_accuracies.append(bn_acc) # 정확도 기록 print(f'iteration #{i}: without={acc}, with={bn_acc}') # 정확도 비교 그래프 x = np.arange(iterations) plt.plot(x, train_accuracies, label='without BN') plt.plot(x, bn_train_accuracies, label='using BN')
mini_batch_size = 100 # 1번 forward에 보낼 데이터 샘플 개수 train_size = X_train.shape[0] iter_per_epoch = int(max(train_size / mini_batch_size, 1)) # 학습하면서 학습/테스트 데이터의 정확도를 각 에포크마다 기록 train_accuracies = [] test_accuracies = [] optimizer = Sgd(learning_rate=0.01) # optimizer for epoch in range(epochs): for i in range(iter_per_epoch): x_batch = X_train[(i * mini_batch_size):((i + 1) * mini_batch_size)] y_batch = Y_train[(i * mini_batch_size):((i + 1) * mini_batch_size)] gradients = neural_net.gradient(x_batch, y_batch) optimizer.update(neural_net.params, gradients) train_acc = neural_net.accuracy(X_train, Y_train) train_accuracies.append(train_acc) test_acc = neural_net.accuracy(X_test, Y_test) test_accuracies.append(test_acc) print(f'epoch #{epoch}: train={train_acc}, test={test_acc}') x = np.arange(epochs) plt.plot(x, train_accuracies, label='Train') plt.plot(x, test_accuracies, label='Test') plt.legend() plt.title(f'Weight Decay (lambda={wd_rate})') plt.xlabel('epoch') plt.ylabel('accuracy') plt.show()
class Agent: """ 各エージェントの動作を main からこっちに書き写す形で. """ n = int() AdjG_init = np.zeros((n, n)) #require to be {0 or 1} to all arguments WeiG_init = np.zeros((n, n)) maxdeg = int() train_size = 0 batch_size = 100 # weight graph 作成規則 ===== # wtype = "maximum-degree" wtype = "local-degree" # =========================== def __init__(self, idx, x_train, t_train, x_test, t_test, optimizer, weight_decay_lambda=0.0): self.idx = idx # self.layer = MultiLayerNet(input_size=784, hidden_size_list=[100], output_size=10, # weight_decay_lambda=weight_decay_lambda) self.layer = MultiLayerNetExtend( input_size=784, hidden_size_list=[50, 50, 50], output_size=10, weight_decay_lambda=weight_decay_lambda, use_dropout=False, dropout_ration=0.0, use_batchnorm=False) self.optimizer = optimizer self.rec_param = np.array([{} for i in range(self.n)]) self.x_train = x_train self.t_train = t_train self.x_test = x_test self.t_test = t_test self.z_vec = np.zeros(self.n) self.z_vec[idx] = 1 self.rec_z = np.zeros((self.n, self.n)) self.AdjG = np.zeros(self.n) #require to be {0 or 1} to all arguments self.WeiG = np.zeros(self.n) if np.all(self.WeiG_init == 0): self.makeWeiGraph(self.AdjG_init) else: self.WeiG = self.WeiG_init[self.idx] self.makeAdjGraph() self.train_loss = 0 self.train_acc = 0 self.test_acc = 0 def send(self, k, agent): """sending params to other nodes (return "self.layer.params"): send(agent)""" return (self.layer.params.copy(), self.z_vec.copy()) def receive(self, agent, getparams, getz): """receiving other node's params: receive(agent, new_params)""" self.rec_param[agent] = getparams.copy() self.rec_z[agent] = getz.copy() def selectData(self, train_size, batch_size): batch_mask = np.random.choice(train_size, batch_size) x_batch = self.x_train[batch_mask] t_batch = self.t_train[batch_mask] return x_batch, t_batch def consensus(self): self.weightConsensus() self.subvalConsensus() def weightConsensus(self): for key in self.layer.params.keys(): self.layer.params[key] *= self.WeiG[self.idx] for idn in np.nonzero(self.AdjG)[0]: self.layer.params[ key] += self.WeiG[idn] * self.rec_param[idn][key] def subvalConsensus(self): self.rec_z[self.idx] = self.z_vec self.z_vec = np.dot(self.WeiG, self.rec_z) def update(self, k=1): x_batch, t_batch = self.selectData(self.train_size, self.batch_size) grads = self.layer.gradient(x_batch, t_batch) self.optimizer.update(self.layer.params, grads, self.z_vec[self.idx], k) # self.optimizer.update(self.layer.params, grads) def calcLoss(self): self.train_acc = self.layer.accuracy(self.x_train, self.t_train) self.test_acc = self.layer.accuracy(self.x_test, self.t_test) self.train_loss = self.layer.loss(self.x_train, self.t_train) def makeAdjGraph(self): """make Adjecency Graph""" self.AdjG = self.AdjG_init[self.idx] def makeWeiGraph(self, lAdjG): """make Weight matrix""" if self.n is 1: tmpWeiG = np.ones([1]) else: if self.wtype == "maximum-degree": tmpWeiG = (1 / (self.maxdeg + 1)) * lAdjG[self.idx] tmpWeiG[self.idx] = 1 - np.sum(tmpWeiG) elif self.wtype == "local-degree": ### count degrees ### #degMat = np.kron(np.dot(lAdjG,np.ones([self.n,1])), np.ones([1,self.n])) degMat = np.kron( np.dot(lAdjG, np.ones([self.n, 1])) + 1, np.ones([1, self.n])) ### take max() for each elements ### degMat = np.maximum(degMat, degMat.T) ### divide for each elememts ### tmpAllWeiG = lAdjG / degMat selfDegMat = np.eye(self.n) - np.diag( np.sum(tmpAllWeiG, axis=1)) tmpAllWeiG = tmpAllWeiG + selfDegMat tmpWeiG = tmpAllWeiG[self.idx, :] else: try: raise ValueError("Error: invalid weight-type") except ValueError as e: print(e) self.WeiG = tmpWeiG ######## # debugging functions ######## def degub_numericalGrad(self): return self.layer.numerical_gradient(self.x_train[:3], self.t_train[:3]) def debug_backpropGrad(self): return self.layer.gradient(self.x_train[:3], self.t_train[:3]) def debug_consensus(self): params = self.layer.params.copy() self.weightConsensus() self.subvalConsensus() if self.idx == 0: ano_params = self.layer.params.copy() for key in params.keys(): diff = np.average(np.abs(params[key] - ano_params[key])) print(key + ":" + str(diff))
for i in range(10000): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] # 学習フェーズ grads = network.gradient(x_train, t_train) optimizer.update(network.params, grads) if i % iter_per_epoch == 0: #loss_train = network.loss(x_train, t_train) #loss_test = network.loss(x_test, t_test) #train_loss_list.append(loss_train) #test_loss_list.append(loss_test) train_acc = network.accuracy(x_train, t_train) test_acc = network.accuracy(x_test, t_test) train_acc_list.append(train_acc) test_acc_list.append(test_acc) epoch_cnt += 1 print("---{}/{}---".format(epoch_cnt, max_epochs)) #print("loss_train : " + str(loss_train)) #print("loss_test : " + str(loss_test)) print("acc_train : " + str(train_acc)) print("acc_test : " + str(test_acc)) if epoch_cnt >= max_epochs: break #plt.plot(train_loss_list, label="train_loss_list") #plt.plot(test_loss_list, label="test_loss_list")
class Agent: """アルゴリズム構築に必要なAgentの機能 Function: send : 隣接するagentの以下の状態変数を送る Args: layer.param (np.array) : 各層のパラメータ receive : 隣接するagentから状態変数を受け取る Args: layer.param (np.arrar) : 各層のパラメータ optimizer(SGD) : 確率勾配をバックプロパゲーションとランダムシードを用いて実装 For example: self.optimizer = optimizer(SGD(lr)) x_batch, t_batch = self.selectData(self.train_size, self.batch_size) grads = self.layer.gradient(x_batch, t_batch) self.optimizer.update(self.layer.params, grads, k) """ """ 各エージェントの動作を main からこっちに書き写す形で. """ n = int() AdjG_init = np.zeros((n, n)) #require to be {0 or 1} to all arguments WeiG_init = np.zeros((n, n)) maxdeg = int() train_size = 0 batch_size = 100 # weight graph 作成規則 ===== # wtype = "maximum-degree" wtype = "local-degree" # =========================== def __init__(self, idx, x_train, t_train, x_test, t_test, optimizer, weight_decay_lambda=0.0): """各Agentの初期状態変数 Args: idx : Agentのインデックス layer : Agent内のニューラルネットワークの層 optimizer : 最適化を行うアルゴリズムの選択 rec_param : 隣接するエージェントから受け取るパラメータ z_vec : 左の固有ベクトル rec_z : 隣接するエージェントから受け取る左固有ベクトル AdjG : 隣接行列?? WeiG : 重み行列?? """ self.idx = idx # self.layer = MultiLayerNet(input_size=784, hidden_size_list=[100], output_size=10, # weight_decay_lambda=weight_decay_lambda) self.layer = MultiLayerNetExtend( input_size=784, hidden_size_list=[ 500, 400, 300, 300, 200, 200, 100, 100, 100, 100, 100, 50, 50 ], output_size=10, weight_decay_lambda=weight_decay_lambda, use_dropout=True, dropout_ration=0.3, use_batchnorm=True) #dropout_ratio=0.03, use_batchnorm=True, hidden_size_list=[500,400,300,300,200,200,100,100,100,50,50,50] weightdecay=0.01 → 0.9428 #hidden_size_list=[500,400,300,300,200,200,100,100,100,50,50,50], output_size=10,weight_decay_lambda=weight_decay_lambda,use_dropout=True, dropout_ration=0.05, use_batchnorm=True 一番いい #hidden_size_list=[100,100,100,100,100] weightdecay=0.3, dropout_ration=0.3 self.optimizer = optimizer self.rec_param = np.array([{} for i in range(self.n)]) self.send_param = np.array([{} for i in range(self.n)]) #Initialize self.rec_param[self.idx] = self.layer.params.copy() self.send_param[self.idx] = self.layer.params.copy() self.x_train = x_train self.t_train = t_train self.x_test = x_test self.t_test = t_test self.AdjG = np.zeros(self.n) #require to be {0 or 1} to all arguments self.WeiG = np.zeros(self.n) if np.all(self.WeiG_init == 0): self.makeWeiGraph(self.AdjG_init) else: self.WeiG = self.WeiG_init[self.idx] self.makeAdjGraph() self.train_loss = 0 self.train_acc = 0 self.test_acc = 0 #山下さんのアルゴリズム構築に必要な関数 # def send(self, k, agent): # """sending params to other nodes (return "self.layer.params"): send(agent)""" # return (self.layer.params.copy(), self.z_vec.copy()) # def receive(self, agent, getparams, getz): # """receiving other node's params: receive(agent, new_params)""" # self.rec_param[agent] = getparams.copy() # self.rec_z[agent] = getz.copy() def send(self, k, agent): """sending params to other nodes (return "self.layer.params"): send(agent)""" self.send_param[self.idx] = self.layer.params.copy() return self.layer.params.copy() def receive(self, agent, getparams): """receiving other node's params: receive(agent, new_params)""" for key in getparams.keys(): self.rec_param[agent][key] = getparams[key].copy() def selectData(self, train_size, batch_size): batch_mask = np.random.choice(train_size, batch_size) x_batch = self.x_train[batch_mask] t_batch = self.t_train[batch_mask] return x_batch, t_batch def consensus(self): self.weightConsensus() # self.subvalConsensus() def weightConsensus(self): for key in self.layer.params.keys(): self.layer.params[key] *= self.WeiG[self.idx] for idn in np.nonzero(self.AdjG)[0]: self.layer.params[ key] += self.WeiG[idn] * self.rec_param[idn][key] # def subvalConsensus(self): # self.rec_z[self.idx] = self.z_vec # self.z_vec = np.dot(self.WeiG, self.rec_z) def update(self, k=1): x_batch, t_batch = self.selectData(self.train_size, self.batch_size) grads = self.layer.gradient(x_batch, t_batch) # self.optimizer.update(self.layer.params, grads, self.z_vec[self.idx], k) self.send_param[self.idx], self.rec_param[ self.idx] = self.optimizer.update(self.layer.params, grads, self.rec_param[self.idx], self.send_param[self.idx], self.WeiG[self.idx], k) def calcLoss(self): self.train_acc = self.layer.accuracy(self.x_train, self.t_train) self.test_acc = self.layer.accuracy(self.x_test, self.t_test) self.train_loss = self.layer.loss(self.x_train, self.t_train) def makeAdjGraph(self): """make Adjecency Graph""" self.AdjG = self.AdjG_init[self.idx] def makeWeiGraph(self, lAdjG): """make Weight matrix 2020/01/28 山下さんは有効グラフを作成している.無向グラフに変更("maximum-degree"の方のみ) Args: tmpWeiG (np.array) : 一次的な重み行列 """ if self.n is 1: tmpWeiG = np.ones([1]) else: if self.wtype == "maximum-degree": tmpWeiG = (1 / (self.maxdeg + 1)) * lAdjG[self.idx] tmpWeiG[self.idx] = 1 - np.sum(tmpWeiG) elif self.wtype == "local-degree": ### count degrees ### #degMat = np.kron(np.dot(lAdjG,np.ones([self.n,1])), np.ones([1,self.n])) degMat = np.kron( np.dot(lAdjG, np.ones([self.n, 1])) + 1, np.ones([1, self.n])) ### take max() for each elements ### degMat = np.maximum(degMat, degMat.T) ### divide for each elememts ### tmpAllWeiG = lAdjG / degMat selfDegMat = np.eye(self.n) - np.diag( np.sum(tmpAllWeiG, axis=1)) tmpAllWeiG = tmpAllWeiG + selfDegMat tmpWeiG = tmpAllWeiG[self.idx, :] else: try: raise ValueError("Error: invalid weight-type") except ValueError as e: print(e) self.WeiG = tmpWeiG ######## # debugging functions ######## def degub_numericalGrad(self): return self.layer.numerical_gradient(self.x_train[:3], self.t_train[:3]) def debug_backpropGrad(self): return self.layer.gradient(self.x_train[:3], self.t_train[:3]) def debug_consensus(self): params = self.layer.params.copy() self.weightConsensus() self.subvalConsensus() if self.idx == 0: ano_params = self.layer.params.copy() for key in params.keys(): diff = np.average(np.abs(params[key] - ano_params[key])) print(key + ":" + str(diff))
# epoch = 0 for i in range(iterations): # 미니 배치를 랜덤하게 선택 (0~999 숫자들 중 128개를 랜덤하게 선택) mask = np.random.choice(train_size, batch_size) x_batch = X_train[mask] y_batch = Y_train[mask] # 배치 정규화 # 배치 정규화를 사용하지 않는 신경망에서 gradient 계산 gradients = neural_net.gradient(x_batch, y_batch) # 파라미터 업데이트 -> W와 b 변경 optimizer.update(neural_net.params, gradients) # 업데이트된 배치 데이터의 정확도를 계산 accuracy = neural_net.accuracy(x_batch, y_batch) # 정확도를 기록 neural_accuracy.append(accuracy) # 배치 정규화를 사용하는 신경망에서 gradient 계산 bn_gradients = bn_neural_net.gradient(x_batch, y_batch) # 파라미터 업데이트 -> W와 b 변경 bn_optimizer.update(bn_neural_net.params, bn_gradients) # 업데이트된 배치 데이터의 정확도를 계산 bn_accuracy = bn_neural_net.accuracy(x_batch, y_batch) # 정확도를 기록 bn_neural_accuracy.append(bn_accuracy) print( f'iteration #{i}: without = {neural_accuracy[i]}, with = {bn_neural_accuracy[i]}' )
batch_size = 128 learning_rate = 0.01 neural_optimizer = Sgd() bn_neural_optimizer = Sgd() neural_acc_list = [] bn_neural_acc_list = [] for i in range(iterator): mask = np.random.choice(X_train.shape[0], batch_size) X_batch = X_train[mask] Y_batch = Y_train[mask] for network in (neural_net, bn_neural_net): gradient = network.gradient(X_batch, Y_batch) if network == neural_net: neural_optimizer.update(network.params, gradient) neural_acc = neural_net.accuracy(X_batch, Y_batch) neural_acc_list.append(neural_acc) else: bn_neural_optimizer.update(network.params, gradient) bn_neural_acc = bn_neural_net.accuracy(X_batch, Y_batch) bn_neural_acc_list.append(bn_neural_acc) print(f'===== {i}번째 training =====') print('Without BatchNorm', neural_acc_list[-1]) print('BatchNorm', bn_neural_acc_list[-1]) # gradients = neural_net.gradient(X_batch, Y_batch) # neural_optimizer.update(neural_net.params, gradients) # neural_acc = neural_net.accuracy(X_batch, Y_batch) # neural_acc_list.append(neural_acc) # # bn_gradients = bn_neural_net.gradient(X_batch, Y_batch)
# 随机挑选出batch_size个样本 batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] # 前向传播,反向传播,获取梯度 for _network in (network, network_weight_decay): grads = _network.gradient(x_batch, t_batch, use_weight_decay=_network.use_weight_decay) optimizer.update(_network.params, grads) if i % iter_num_per_epoch == 0: epoch_num += 1 train_acc_network = network.accuracy(x_train, t_train) train_acc_list.append(train_acc_network) test_acc_network = network.accuracy(x_test, t_test) test_acc_list.append(train_acc_network) train_acc_network_weight_decay = network_weight_decay.accuracy( x_train, t_train) train_acc_weight_decay_list.append(train_acc_network_weight_decay) test_acc_network_weight_decay = network_weight_decay.accuracy( x_test, t_test) test_acc_weight_decay_list.append(test_acc_network_weight_decay) if epoch_num >= epoch_cnt_max: break