def train(network, x_train, y_train, x_test, y_test, iter_times=10000, hidden_size=10, batch_size=100, lr=0.1): nn = network optimizers = { 'SGD': SGD(lr), 'Momentum': Momentum(lr), 'Nesterov': Nesterov(lr), 'AdaGrad': AdaGrad(lr), 'RMSProp': RMSProp(0.02), # lr == 0.1 may make loss += ln(eps), eps == 1e-15 'Adam': Adam(0.005) } opt = optimizers['Adam'] for i in range(iter_times): if i % max(x_train.shape[0] // batch_size, 1) == 0: print('{:.1%}'.format(i / iter_times)) batch_mask = np.random.choice(x_train.shape[0], batch_size) x_batch, y_batch = x_train[batch_mask], y_train[batch_mask] grads = nn.grad(x_batch, y_batch) opt.update(nn.params, grads) print('Train acc: {:.4} Test acc: {:.4}'.format( nn.accuracy(x_train, y_train), nn.accuracy(x_test, y_test)))
def df(x, y): return x / 10.0, 2.0*y init_pos = (-7.0, 2.0) params = {} params['x'], params['y'] = init_pos[0], init_pos[1] grads = {} grads['x'], grads['y'] = 0, 0 optimizers = OrderedDict() optimizers["SGD"] = SGD(lr=0.95) optimizers["Momentum"] = Momentum(lr=0.1) optimizers["AdaGrad"] = AdaGrad(lr=1.5) optimizers["Adam"] = Adam(lr=0.3) idx = 1 for key in optimizers: optimizer = optimizers[key] x_history = [] y_history = [] params['x'], params['y'] = init_pos[0], init_pos[1] for i in range(30): x_history.append(params['x']) y_history.append(params['y']) grads['x'], grads['y'] = df(params['x'], params['y'])
learning_rate = 0.1 network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) iter_per_epoch = max(train_size / batch_size, 1) for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] grad = network.gradient(x_batch, t_batch) # for key in ('W1', 'b1', 'W2', 'b2'): # network.params[key] -= learning_rate * grad[key] #확률적경사하강법 #optimizer = SGD() #AdaGrad optimizer = AdaGrad() optimizer.update(network.params, grad) loss = network.loss(x_batch, t_batch) train_loss_list.append(loss) if i % iter_per_epoch == 0: train_acc = network.accuracy(x_train, t_train) test_acc = network.accuracy(x_test, t_test) train_acc_list.append(train_acc) test_acc_list.append(test_acc) print(train_acc, test_acc)
if __name__ == '__main__': from dataset.mnist import load_mnist from common.optimizer import SGD, Momentum, AdaGrad, Adam from common.multi_layer_net import MultiLayerNet (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) multi_layer_net = MultiLayerNet(784, output_size=10, hidden_size_list=[100, 100, 100, 100]) mul_layer_net = MulLayerNet(784, output_size=10, hidden_size_list=[100, 100, 100, 100]) max_iterations = 2000 train_size = x_train.shape[0] batch_size = 128 optimizer_mul = AdaGrad() optimizer_multi = AdaGrad() for i in range(max_iterations): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] grads_mul = mul_layer_net.gradient(x_batch, t_batch) grads_multi = multi_layer_net.gradient(x_batch, t_batch) optimizer_mul.update(mul_layer_net.params, grads_mul) optimizer_multi.update(multi_layer_net.params, grads_multi) loss_mul = mul_layer_net.loss(x_batch, t_batch) loss_multi = multi_layer_net.loss(x_batch, t_batch)
train_loss_list = [] train_acc_list = [] test_acc_list = [] ratio_list = [] train_size = t_train.shape[0] # sigmoid better than Relu for DPL network = DPLMultiLayerNet(input_size, hidden_size, output_size, batch_size, activation='sigmoid', dpl=DPL) optimizer = AdaGrad() # very good #optimizer = Adam() #optimizer = SGD() def set_batch(): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] network.set_batch(x_batch, t_batch) epoch_cnt = 0 max_epochs = 201 for i in range(iters_num): # 勾配
from common.util import smooth_curve from common.multi_layer_net import MultiLayerNet from common.optimizer import SGD, Momentum, AdaGrad, Adam # 0:读入MNIST数据========== (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) train_size = x_train.shape[0] batch_size = 128 max_iterations = 2000 # 1:进行实验的设置========== optimizers = {} optimizers['SGD'] = SGD() optimizers['Momentum'] = Momentum() optimizers['AdaGrad'] = AdaGrad() optimizers['Adam'] = Adam() #optimizers['RMSprop'] = RMSprop() networks = {} train_loss = {} for key in optimizers.keys(): networks[key] = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100], output_size=10) train_loss[key] = [] # 2:开始训练========== for i in range(max_iterations): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask]
def df(x, y): return x / 10.0, 2.0 * y init_pos = (-7.0, 2.0) params = {} params['x'], params['y'] = init_pos[0], init_pos[1] grads = {} grads['x'], grads['y'] = 0, 0 optimizers = OrderedDict() optimizers['SGD'] = SGD(lr=0.95) optimizers['Momentum'] = Momentum(lr=0.1) optimizers['AdaGrad'] = AdaGrad(lr=1.5) optimizers['Adam'] = Adam(lr=0.3) idx = 1 for key in optimizers: optimizer = optimizers[key] x_history = [] y_history = [] params['x'], params['y'] = init_pos[0], init_pos[1] for i in range(30): x_history.append(params['x']) y_history.append(params['y']) grads['x'], grads['y'] = df(params['x'], params['y'])