Exemplo n.º 1
0
def inin_adam_states():
    v_w = nd.zeros((features.shape[1], 1))
    v_b = nd.zeros(1)
    s_w = nd.zeros((features.shape[1], 1))
    s_b = nd.zeros(1)
    return ((v_w, s_w), (v_b, s_b))


def adam(params, states, hyperparams):
    beta1, beta2, eps = 0.9, 0.999, 1e-6
    for p, (v, s) in zip(params, states):
        v[:] = beta1 * v + (1 - beta1) * p.grad
        s[:] = beta2 * s + (1 - beta2) * p.grad.square()
        corr_v = v / (1 - beta1**hyperparams['t'])
        corr_s = s / (1 - beta2**hyperparams['t'])
        g = (hyperparams['lr'] * corr_v) / (corr_s.sqrt() + eps)
        p[:] -= g
    hyperparams['t'] += 1


d2l.plt.figure(figsize=(15, 5))  # 设置图片大小
# init_momentum_states()不要忘记括号,可能只是执行这个函数,而不是传进去
d2l.train_ch7(adam, inin_adam_states(), {'lr': 0.01, 't': 1}, features, labels)

# TODO 简洁实现
d2l.plt.figure(figsize=(15, 5))  # 设置图片大小
# 参数是rho,没有learning_rate!!
d2l.train_gluon_ch7('adam', {'learning_rate': 0.01}, features, labels)

d2l.plt.show()
Exemplo n.º 2
0
    return x1-v1, x2-v2, v1, v2

eta, gamma = 0.4, 0.5
d2l.show_trace_2d(f_2d, d2l.train_2d(momentum_2d))

eta =0.6
d2l.show_trace_2d(f_2d, d2l.train_2d(momentum_2d))
#指数加权移动平均-推导过程
#由指数加权移动平均理解动量法
#7.4.3-从零开始实现
features, labels = d2l.get_data_ch7()
def init_momentum_states():
    v_w = nd.zeros((features.shape[1], 1))
    v_b = nd.zeros(1)
    return (v_w, v_b)

def sgd_momentum(params, states, hyperparams):
    for p, v in zip(params, states):
        v[:] = hyperparams['momentum']*v + hyperparams['lr'] * p.grad
        p[:] -= v

d2l.train_ch7(sgd_momentum, init_momentum_states(),
              {'lr':0.02, 'momentum':0.5}, features, labels)
d2l.train_ch7(sgd_momentum, init_momentum_states(),
              {'lr':0.02, 'momentum':0.9}, features, labels)
d2l.train_ch7(sgd_momentum, init_momentum_states(),
              {'lr':0.004, 'momentum':0.5}, features, labels)
#7.4.4-简洁实现
d2l.train_gluon_ch7('sgd', {'learning_rate':0.004, 'momentum':0.9}, features, labels)

Exemplo n.º 3
0
eta, gamma = 0.4, 0.9
d2l.show_trace_2d(f_2d, d2l.train_2d(rmsprop_2d))

#7.6.2-从零开始实现
features, labels = d2l.get_data_ch7()


def init_rmsprop_states():
    s_w = nd.zeros((features.shape[1], 1))
    s_b = nd.zeros(1)
    return (s_w, s_b)


def rmsprop(params, states, hyperparams):
    gamma, eps = hyperparams['gamma'], 1e-6
    for p, s in zip(params, states):
        s[:] = gamma * s + (1 - gamma) * p.grad.square()
        p[:] -= hyperparams['lr'] * p.grad / (s + eps).sqrt()


d2l.train_ch7(rmsprop, init_rmsprop_states(), {
    'lr': 0.01,
    'gamma': 0.9
}, features, labels)
#7.6.3-简洁实现
d2l.train_gluon_ch7('rmsprop', {
    'learning_rate': 0.01,
    'gamma1': 0.9
}, features, labels)
Exemplo n.º 4
0
from mxnet import nd

features, labels = d2l.get_data_ch7()


def init_adadelta_states():
    s_w, s_b = nd.zeros((features.shape[1], 1)), nd.zeros(1)
    delta_w, delta_b = nd.zeros((features.shape[1], 1)), nd.zeros(1)
    return ((s_w, delta_w), (s_b, delta_b))


def adadelta(params, states, hyperparams):
    rho, eps = hyperparams['rho'], 1e-5
    for p, (s, delta) in zip(params, states):
        s[:] = rho * s + (1 - rho) * p.grad.square()
        g = ((delta + eps).sqrt() / (s + eps).sqrt()) * p.grad
        p[:] -= g
        delta[:] = rho * delta + (1 - rho) * g * g


# In[4]:

d2l.train_ch7(adadelta, init_adadelta_states(), {'rho': 0.9}, features, labels)

# In[6]:

#简洁实现
d2l.train_gluon_ch7('adadelta', {'rho': 0.9}, features, labels)

# In[ ]: