def test_grad_input(): torch.manual_seed(0) n_batch, n_state, n_ctrl = 2, 3, 4 hidden_sizes = [42] * 5 for act in ['relu', 'sigmoid']: x = Variable(torch.rand(n_batch, n_state), requires_grad=True) u = Variable(torch.rand(n_batch, n_ctrl), requires_grad=True) net = NNDynamics(n_state, n_ctrl, hidden_sizes=hidden_sizes, activation=act) x_ = net(x, u) R, S = [], [] for i in range(n_batch): Ri, Si = [], [] for j in range(n_state): grad_xij, grad_uij = grad(x_[i, j], [x, u], create_graph=True) grad_xij = grad_xij[i] grad_uij = grad_uij[i] Ri.append(grad_xij) Si.append(grad_uij) R.append(torch.stack(Ri)) S.append(torch.stack(Si)) R = torch.stack(R) S = torch.stack(S) R_, S_ = net.grad_input(x, u) npt.assert_allclose(R.data.numpy(), R_.data.numpy(), rtol=1e-4) npt.assert_allclose(S.data.numpy(), S_.data.numpy(), rtol=1e-4)
def test_lqr_linearization(): npr.seed(0) torch.manual_seed(0) n_batch, n_state, n_ctrl, T = 2, 3, 4, 5 hidden_sizes = [10] n_sc = n_state + n_ctrl C = 10. * npr.randn(T, n_batch, n_sc, n_sc).astype(np.float64) C = np.matmul(C.transpose(0, 1, 3, 2), C) c = 10. * npr.randn(T, n_batch, n_sc).astype(np.float64) x_init = npr.randn(n_batch, n_state).astype(np.float64) # beta = 0.5 beta = 2.0 u_lower = -beta * np.ones((T, n_batch, n_ctrl)).astype(np.float64) u_upper = beta * np.ones((T, n_batch, n_ctrl)).astype(np.float64) _C, _c, _x_init, _u_lower, _u_upper = [ Variable(torch.Tensor(x).double(), requires_grad=True) if x is not None else None for x in [C, c, x_init, u_lower, u_upper] ] F = Variable(torch.randn(1, 1, n_state, n_sc).repeat(T - 1, 1, 1, 1).double(), requires_grad=True) dynamics = NNDynamics(n_state, n_ctrl, hidden_sizes, activation='sigmoid').double() u_init = None _lqr = mpc.MPC( n_state, n_ctrl, T, _x_init, _u_lower, _u_upper, u_init, grad_method=GradMethods.ANALYTIC, ) u = torch.randn(T, n_batch, n_ctrl).type_as(_x_init.data) x = util.get_traj(T, u, x_init=_x_init, dynamics=dynamics) x = torch.stack(x, 0) Fan, fan = _lqr.linearize_dynamics(x, u, dynamics, diff=False) _lqr.grad_method = GradMethods.AUTO_DIFF Fau, fau = _lqr.linearize_dynamics(x, u, dynamics, diff=False) npt.assert_allclose(Fan.data.numpy(), Fau.data.numpy(), atol=1e-4) npt.assert_allclose(fan.data.numpy(), fau.data.numpy(), atol=1e-4) # Make sure diff version doesn't crash: Fau, fau = _lqr.linearize_dynamics(x, u, dynamics, diff=True) _lqr.grad_method = GradMethods.FINITE_DIFF Ff, ff = _lqr.linearize_dynamics(x, u, dynamics, diff=False) npt.assert_allclose(Fan.data.numpy(), Ff.data.numpy(), atol=1e-4) npt.assert_allclose(fan.data.numpy(), ff.data.numpy(), atol=1e-4) # Make sure diff version doesn't crash: Ff, ff = _lqr.linearize_dynamics(x, u, dynamics, diff=True)
def test_lqr_backward_cost_nn_dynamics_module_constrained(): npr.seed(0) torch.manual_seed(0) n_batch, n_state, n_ctrl, T = 1, 2, 2, 2 hidden_sizes = [10, 10] n_sc = n_state + n_ctrl C = 10. * npr.randn(T, n_batch, n_sc, n_sc).astype(np.float64) C = np.matmul(C.transpose(0, 1, 3, 2), C) c = 10. * npr.randn(T, n_batch, n_sc).astype(np.float64) x_init = npr.randn(n_batch, n_state).astype(np.float64) beta = 1. u_lower = -beta * np.ones((T, n_batch, n_ctrl)).astype(np.float64) u_upper = beta * np.ones((T, n_batch, n_ctrl)).astype(np.float64) dynamics = NNDynamics(n_state, n_ctrl, hidden_sizes, activation='sigmoid').double() fc0b = dynamics.fcs[0].bias.view(-1).data.numpy().copy() def forward_numpy(C, c, x_init, u_lower, u_upper, fc0b): _C, _c, _x_init, _u_lower, _u_upper, fc0b = [ Variable(torch.Tensor(x).double()) if x is not None else None for x in [C, c, x_init, u_lower, u_upper, fc0b] ] dynamics.fcs[0].bias.data[:] = fc0b.data # dynamics.A.data[:] = fc0b.view(n_state, n_state).data u_init = None x_lqr, u_lqr, objs_lqr = mpc.MPC( n_state, n_ctrl, T, _x_init, _u_lower, _u_upper, u_init, lqr_iter=40, verbose=-1, exit_unconverged=True, backprop=False, max_linesearch_iter=1, )(_C, _c, dynamics) return util.get_data_maybe(u_lqr.view(-1)).numpy() def f_c(c_flat): c_ = c_flat.reshape(T, n_batch, n_sc) return forward_numpy(C, c_, x_init, u_lower, u_upper, fc0b) def f_fc0b(fc0b): return forward_numpy(C, c, x_init, u_lower, u_upper, fc0b) u = forward_numpy(C, c, x_init, u_lower, u_upper, fc0b) # Make sure the solution is strictly partially on the boundary. assert np.any(u == u_lower.reshape(-1)) or np.any(u == u_upper.reshape(-1)) assert np.any((u != u_lower.reshape(-1)) & (u != u_upper.reshape(-1))) du_dc_fd = nd.Jacobian(f_c)(c.reshape(-1)) du_dfc0b_fd = nd.Jacobian(f_fc0b)(fc0b.reshape(-1)) dynamics.fcs[0].bias.data = torch.DoubleTensor(fc0b).clone() _C, _c, _x_init, _u_lower, _u_upper, fc0b = [ Variable(torch.Tensor(x).double(), requires_grad=True) if x is not None else None for x in [C, c, x_init, u_lower, u_upper, fc0b] ] u_init = None x_lqr, u_lqr, objs_lqr = mpc.MPC( n_state, n_ctrl, T, _x_init, _u_lower, _u_upper, u_init, lqr_iter=20, verbose=-1, max_linesearch_iter=1, grad_method=GradMethods.ANALYTIC, )(_C, _c, dynamics) u_lqr_flat = u_lqr.view(-1) du_dC = [] du_dc = [] du_dfc0b = [] for i in range(len(u_lqr_flat)): dCi = grad(u_lqr_flat[i], [_C], create_graph=True)[0].view(-1) dci = grad(u_lqr_flat[i], [_c], create_graph=True)[0].view(-1) dfc0b = grad(u_lqr_flat[i], [dynamics.fcs[0].bias], create_graph=True)[0].view(-1) du_dC.append(dCi) du_dc.append(dci) du_dfc0b.append(dfc0b) du_dC = torch.stack(du_dC).data.numpy() du_dc = torch.stack(du_dc).data.numpy() du_dfc0b = torch.stack(du_dfc0b).data.numpy() npt.assert_allclose(du_dc_fd, du_dc, atol=1e-3) npt.assert_allclose(du_dfc0b_fd, du_dfc0b, atol=1e-3)