def test_sgd_optimizer_no_momentum(): params = [np.zeros(shape) for shape in shapes] for lr in [10 ** i for i in range(-3, 4)]: optimizer = SGDOptimizer(params, lr, momentum=0, nesterov=False) grads = [np.random.random(shape) for shape in shapes] expected = [param - lr * grad for param, grad in zip(params, grads)] optimizer.update_params(grads) for exp, param in zip(expected, optimizer.params): assert_array_equal(exp, param)
def test_sgd_optimizer_momentum(): params = [np.zeros(shape) for shape in shapes] lr = 0.1 for momentum in np.arange(0.5, 0.9, 0.1): optimizer = SGDOptimizer(params, lr, momentum=momentum, nesterov=False) velocities = [np.random.random(shape) for shape in shapes] optimizer.velocities = velocities grads = [np.random.random(shape) for shape in shapes] updates = [momentum * velocity - lr * grad for velocity, grad in zip(velocities, grads)] expected = [param + update for param, update in zip(params, updates)] optimizer.update_params(grads) for exp, param in zip(expected, optimizer.params): assert_array_equal(exp, param)
def test_sgd_optimizer_nesterovs_momentum(): params = [np.zeros(shape) for shape in shapes] lr = 0.1 rng = np.random.RandomState(0) for momentum in np.arange(0.5, 0.9, 0.1): optimizer = SGDOptimizer(params, lr, momentum=momentum, nesterov=True) velocities = [rng.random_sample(shape) for shape in shapes] optimizer.velocities = velocities grads = [rng.random_sample(shape) for shape in shapes] updates = [ momentum * velocity - lr * grad for velocity, grad in zip(velocities, grads) ] updates = [ momentum * update - lr * grad for update, grad in zip(updates, grads) ] expected = [param + update for param, update in zip(params, updates)] optimizer.update_params(params, grads) for exp, param in zip(expected, params): assert_array_equal(exp, param)