示例#1
0
def train(network,
          x_train,
          y_train,
          x_test,
          y_test,
          iter_times=10000,
          hidden_size=10,
          batch_size=100,
          lr=0.1):
    nn = network
    optimizers = {
        'SGD': SGD(lr),
        'Momentum': Momentum(lr),
        'Nesterov': Nesterov(lr),
        'AdaGrad': AdaGrad(lr),
        'RMSProp':
        RMSProp(0.02),  # lr == 0.1 may make loss += ln(eps), eps == 1e-15
        'Adam': Adam(0.005)
    }
    opt = optimizers['Adam']

    for i in range(iter_times):
        if i % max(x_train.shape[0] // batch_size, 1) == 0:
            print('{:.1%}'.format(i / iter_times))
        batch_mask = np.random.choice(x_train.shape[0], batch_size)
        x_batch, y_batch = x_train[batch_mask], y_train[batch_mask]
        grads = nn.grad(x_batch, y_batch)
        opt.update(nn.params, grads)

    print('Train acc: {:.4}  Test acc: {:.4}'.format(
        nn.accuracy(x_train, y_train), nn.accuracy(x_test, y_test)))
示例#2
0

def df(x, y):
    return x / 10.0, 2.0*y

init_pos = (-7.0, 2.0)
params = {}
params['x'], params['y'] = init_pos[0], init_pos[1]
grads = {}
grads['x'], grads['y'] = 0, 0


optimizers = OrderedDict()
optimizers["SGD"] = SGD(lr=0.95)
optimizers["Momentum"] = Momentum(lr=0.1)
optimizers["AdaGrad"] = AdaGrad(lr=1.5)
optimizers["Adam"] = Adam(lr=0.3)

idx = 1

for key in optimizers:
    optimizer = optimizers[key]
    x_history = []
    y_history = []
    params['x'], params['y'] = init_pos[0], init_pos[1]
    
    for i in range(30):
        x_history.append(params['x'])
        y_history.append(params['y'])
        
        grads['x'], grads['y'] = df(params['x'], params['y'])
示例#3
0
learning_rate = 0.1

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

iter_per_epoch = max(train_size / batch_size, 1)

for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    grad = network.gradient(x_batch, t_batch)

    # for key in ('W1', 'b1', 'W2', 'b2'):
    #     network.params[key] -= learning_rate * grad[key]
    #확률적경사하강법
    #optimizer = SGD()
    #AdaGrad
    optimizer = AdaGrad()
    optimizer.update(network.params, grad)

    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)

    if i % iter_per_epoch == 0:
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print(train_acc, test_acc)
示例#4
0
if __name__ == '__main__':
    from dataset.mnist import load_mnist
    from common.optimizer import SGD, Momentum, AdaGrad, Adam
    from common.multi_layer_net import MultiLayerNet
    (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)
    multi_layer_net = MultiLayerNet(784,
                                    output_size=10,
                                    hidden_size_list=[100, 100, 100, 100])
    mul_layer_net = MulLayerNet(784,
                                output_size=10,
                                hidden_size_list=[100, 100, 100, 100])

    max_iterations = 2000
    train_size = x_train.shape[0]
    batch_size = 128
    optimizer_mul = AdaGrad()
    optimizer_multi = AdaGrad()

    for i in range(max_iterations):
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]

        grads_mul = mul_layer_net.gradient(x_batch, t_batch)
        grads_multi = multi_layer_net.gradient(x_batch, t_batch)

        optimizer_mul.update(mul_layer_net.params, grads_mul)
        optimizer_multi.update(multi_layer_net.params, grads_multi)

        loss_mul = mul_layer_net.loss(x_batch, t_batch)
        loss_multi = multi_layer_net.loss(x_batch, t_batch)
示例#5
0
train_loss_list = []
train_acc_list = []
test_acc_list = []
ratio_list = []

train_size = t_train.shape[0]

# sigmoid better than Relu for DPL

network = DPLMultiLayerNet(input_size,
                           hidden_size,
                           output_size,
                           batch_size,
                           activation='sigmoid',
                           dpl=DPL)
optimizer = AdaGrad()  # very good
#optimizer = Adam()
#optimizer = SGD()


def set_batch():
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    network.set_batch(x_batch, t_batch)


epoch_cnt = 0
max_epochs = 201
for i in range(iters_num):
    # 勾配
from common.util import smooth_curve
from common.multi_layer_net import MultiLayerNet
from common.optimizer import SGD, Momentum, AdaGrad, Adam

# 0:读入MNIST数据==========
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)

train_size = x_train.shape[0]
batch_size = 128
max_iterations = 2000

# 1:进行实验的设置==========
optimizers = {}
optimizers['SGD'] = SGD()
optimizers['Momentum'] = Momentum()
optimizers['AdaGrad'] = AdaGrad()
optimizers['Adam'] = Adam()
#optimizers['RMSprop'] = RMSprop()

networks = {}
train_loss = {}
for key in optimizers.keys():
    networks[key] = MultiLayerNet(input_size=784,
                                  hidden_size_list=[100, 100, 100, 100],
                                  output_size=10)
    train_loss[key] = []

# 2:开始训练==========
for i in range(max_iterations):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]

def df(x, y):
    return x / 10.0, 2.0 * y


init_pos = (-7.0, 2.0)
params = {}
params['x'], params['y'] = init_pos[0], init_pos[1]
grads = {}
grads['x'], grads['y'] = 0, 0

optimizers = OrderedDict()
optimizers['SGD'] = SGD(lr=0.95)
optimizers['Momentum'] = Momentum(lr=0.1)
optimizers['AdaGrad'] = AdaGrad(lr=1.5)
optimizers['Adam'] = Adam(lr=0.3)

idx = 1

for key in optimizers:
    optimizer = optimizers[key]
    x_history = []
    y_history = []
    params['x'], params['y'] = init_pos[0], init_pos[1]

    for i in range(30):
        x_history.append(params['x'])
        y_history.append(params['y'])

        grads['x'], grads['y'] = df(params['x'], params['y'])