示例#1
0
def test_lqr_backward_cost_nn_dynamics_module_constrained():
    npr.seed(0)
    torch.manual_seed(0)
    n_batch, n_state, n_ctrl, T = 1, 2, 2, 2
    hidden_sizes = [10, 10]
    n_sc = n_state + n_ctrl

    C = 10.*npr.randn(T, n_batch, n_sc, n_sc).astype(np.float64)
    C = np.matmul(C.transpose(0, 1, 3, 2), C)
    c = 10.*npr.randn(T, n_batch, n_sc).astype(np.float64)

    x_init = npr.randn(n_batch, n_state).astype(np.float64)
    beta = 1.
    u_lower = -beta*np.ones((T, n_batch, n_ctrl)).astype(np.float64)
    u_upper = beta*np.ones((T, n_batch, n_ctrl)).astype(np.float64)

    dynamics = NNDynamics(
        n_state, n_ctrl, hidden_sizes, activation='sigmoid').double()
    fc0b = dynamics.fcs[0].bias.view(-1).data.numpy().copy()

    def forward_numpy(C, c, x_init, u_lower, u_upper, fc0b):
        _C, _c, _x_init, _u_lower, _u_upper, fc0b = [
            Variable(torch.Tensor(x).double()) if x is not None else None
            for x in [C, c, x_init, u_lower, u_upper, fc0b]
        ]

        dynamics.fcs[0].bias.data[:] = fc0b.data
        # dynamics.A.data[:] = fc0b.view(n_state, n_state).data
        u_init = None
        x_lqr, u_lqr, objs_lqr = mpc.MPC(
            n_state, n_ctrl, T, _u_lower, _u_upper, u_init,
            lqr_iter=40,
            verbose=-1,
            exit_unconverged=True,
            backprop=False,
            max_linesearch_iter=1,
        )(_x_init, QuadCost(_C, _c), dynamics)
        return util.get_data_maybe(u_lqr.view(-1)).numpy()

    def f_c(c_flat):
        c_ = c_flat.reshape(T, n_batch, n_sc)
        return forward_numpy(C, c_, x_init, u_lower, u_upper, fc0b)

    def f_fc0b(fc0b):
        return forward_numpy(C, c, x_init, u_lower, u_upper, fc0b)

    u = forward_numpy(C, c, x_init, u_lower, u_upper, fc0b)

    # Make sure the solution is strictly partially on the boundary.
    assert np.any(u == u_lower.reshape(-1)) or np.any(u == u_upper.reshape(-1))
    assert np.any((u != u_lower.reshape(-1)) & (u != u_upper.reshape(-1)))

    du_dc_fd = nd.Jacobian(f_c)(c.reshape(-1))
    du_dfc0b_fd = nd.Jacobian(f_fc0b)(fc0b.reshape(-1))

    dynamics.fcs[0].bias.data = torch.DoubleTensor(fc0b).clone()

    _C, _c, _x_init, _u_lower, _u_upper, fc0b = [
        Variable(torch.Tensor(x).double(), requires_grad=True)
        if x is not None else None
        for x in [C, c, x_init, u_lower, u_upper, fc0b]
    ]

    u_init = None
    x_lqr, u_lqr, objs_lqr = mpc.MPC(
        n_state, n_ctrl, T, _u_lower, _u_upper, u_init,
        lqr_iter=20,
        verbose=-1,
        max_linesearch_iter=1,
        grad_method=GradMethods.ANALYTIC,
    )(_x_init, QuadCost(_C, _c), dynamics)
    u_lqr_flat = u_lqr.view(-1)

    du_dC = []
    du_dc = []
    du_dfc0b = []
    for i in range(len(u_lqr_flat)):
        dCi = grad(u_lqr_flat[i], [_C], retain_graph=True)[0].view(-1)
        dci = grad(u_lqr_flat[i], [_c], retain_graph=True)[0].view(-1)
        dfc0b = grad(u_lqr_flat[i], [dynamics.fcs[0].bias],
                     retain_graph=True)[0].view(-1)
        du_dC.append(dCi)
        du_dc.append(dci)
        du_dfc0b.append(dfc0b)
    du_dC = torch.stack(du_dC).data.numpy()
    du_dc = torch.stack(du_dc).data.numpy()
    du_dfc0b = torch.stack(du_dfc0b).data.numpy()

    npt.assert_allclose(du_dc_fd, du_dc, atol=1e-3)
    npt.assert_allclose(du_dfc0b_fd, du_dfc0b, atol=1e-3)
示例#2
0
    ))  # nx + nu
    px = -torch.sqrt(goal_weights) * goal_state
    p = torch.cat((px, torch.zeros(nu)))
    Q = torch.diag(q).repeat(TIMESTEPS, N_BATCH, 1, 1)  # T x B x nx+nu x nx+nu
    p = p.repeat(TIMESTEPS, N_BATCH, 1)
    cost = mpc.QuadCost(Q, p)  # T x B x nx+nu (linear component of cost)

    # run MPC
    total_reward = 0
    for i in range(run_iter):
        state = env.state.copy()
        state = torch.tensor(state).view(1, -1).float()
        command_start = time.perf_counter()
        # recreate controller using updated u_init (kind of wasteful right?)
        ctrl = mpc.MPC(nx, nu, TIMESTEPS, u_lower=ACTION_LOW, u_upper=ACTION_HIGH, lqr_iter=LQR_ITER,
                       exit_unconverged=False, eps=1e-2,
                       n_batch=N_BATCH, backprop=False, verbose=0, u_init=u_init,
                       grad_method=mpc.GradMethods.AUTO_DIFF)

        # compute action based on current state, dynamics, and cost
        # nominal_states, nominal_actions, nominal_objs = ctrl(state, cost, PendulumDynamics())
        nominal_states, nominal_actions, nominal_objs = ctrl(state, cost, MountainCarDynamics())
        action = nominal_actions[0]  # take first planned action
        u_init = torch.cat((nominal_actions[1:], torch.zeros(1, N_BATCH, nu)), dim=0)

        elapsed = time.perf_counter() - command_start
        s, r, _, _ = env.step(action.detach().numpy())
        total_reward += r
        logger.debug("action taken: %.4f cost received: %.4f time taken: %.5fs", action, -r, elapsed)
        if render:
            env.render()
示例#3
0
def test_lqr_backward_cost_linear_dynamics_constrained():
    npr.seed(0)
    torch.manual_seed(0)
    n_batch, n_state, n_ctrl, T = 1, 2, 2, 3
    hidden_sizes = [10, 10]
    n_sc = n_state + n_ctrl

    C = 10.*npr.randn(T, n_batch, n_sc, n_sc).astype(np.float64)
    C = np.matmul(C.transpose(0, 1, 3, 2), C)
    c = 10.*npr.randn(T, n_batch, n_sc).astype(np.float64)

    x_init = npr.randn(n_batch, n_state).astype(np.float64)
    beta = 0.5
    u_lower = -beta*np.ones((T, n_batch, n_ctrl)).astype(np.float64)
    u_upper = beta*np.ones((T, n_batch, n_ctrl)).astype(np.float64)

    F = npr.randn(T-1, n_batch, n_state, n_sc)
    f = npr.randn(T-1, n_batch, n_state)

    def forward_numpy(C, c, x_init, u_lower, u_upper, F, f):
        _C, _c, _x_init, _u_lower, _u_upper, F, f = [
            Variable(torch.Tensor(x).double()) if x is not None else None
            for x in [C, c, x_init, u_lower, u_upper, F, f]
        ]

        u_init = None
        x_lqr, u_lqr, objs_lqr = mpc.MPC(
            n_state, n_ctrl, T, _u_lower, _u_upper, u_init,
            lqr_iter=40,
            verbose=1,
            exit_unconverged=True,
            backprop=False,
            max_linesearch_iter=2,
        )(_x_init, QuadCost(_C, _c), LinDx(F, f))
        return util.get_data_maybe(u_lqr.view(-1)).numpy()

    def f_c(c_flat):
        c_ = c_flat.reshape(T, n_batch, n_sc)
        return forward_numpy(C, c_, x_init, u_lower, u_upper, F, f)

    def f_F(F_flat):
        F_ = F_flat.reshape(T-1, n_batch, n_state, n_sc)
        return forward_numpy(C, c, x_init, u_lower, u_upper, F_, f)

    def f_f(f_flat):
        f_ = f_flat.reshape(T-1, n_batch, n_state)
        return forward_numpy(C, c, x_init, u_lower, u_upper, F, f_)

    def f_x_init(x_init):
        x_init = x_init.reshape(1, -1)
        return forward_numpy(C, c, x_init, u_lower, u_upper, F, f)

    u = forward_numpy(C, c, x_init, u_lower, u_upper, F, f)

    # Make sure the solution is strictly partially on the boundary.
    assert np.any(u == u_lower.reshape(-1)) or np.any(u == u_upper.reshape(-1))
    assert np.any((u != u_lower.reshape(-1)) & (u != u_upper.reshape(-1)))

    du_dc_fd = nd.Jacobian(f_c)(c.reshape(-1))
    du_dF_fd = nd.Jacobian(f_F)(F.reshape(-1))
    du_df_fd = nd.Jacobian(f_f)(f.reshape(-1))
    du_dxinit_fd = nd.Jacobian(f_x_init)(x_init[0])

    _C, _c, _x_init, _u_lower, _u_upper, F, f = [
        Variable(torch.Tensor(x).double(), requires_grad=True)
        if x is not None else None
        for x in [C, c, x_init, u_lower, u_upper, F, f]
    ]

    u_init = None
    x_lqr, u_lqr, objs_lqr = mpc.MPC(
        n_state, n_ctrl, T, _u_lower, _u_upper, u_init,
        lqr_iter=20,
        verbose=1,
    )(_x_init, QuadCost(_C, _c), LinDx(F, f))
    u_lqr_flat = u_lqr.view(-1)

    du_dC = []
    du_dc = []
    du_dF = []
    du_df = []
    du_dx_init = []
    for i in range(len(u_lqr_flat)):
        dCi = grad(u_lqr_flat[i], [_C], retain_graph=True)[0].view(-1)
        dci = grad(u_lqr_flat[i], [_c], retain_graph=True)[0].view(-1)
        dF = grad(u_lqr_flat[i], [F], retain_graph=True)[0].view(-1)
        df = grad(u_lqr_flat[i], [f], retain_graph=True)[0].view(-1)
        dx_init = grad(u_lqr_flat[i], [_x_init], retain_graph=True)[0].view(-1)
        du_dC.append(dCi)
        du_dc.append(dci)
        du_dF.append(dF)
        du_df.append(df)
        du_dx_init.append(dx_init)
    du_dC = torch.stack(du_dC).data.numpy()
    du_dc = torch.stack(du_dc).data.numpy()
    du_dF = torch.stack(du_dF).data.numpy()
    du_df = torch.stack(du_df).data.numpy()
    du_dx_init = torch.stack(du_dx_init).data.numpy()

    npt.assert_allclose(du_dc_fd, du_dc, atol=1e-4)
    npt.assert_allclose(du_dF, du_dF_fd, atol=1e-4)
    npt.assert_allclose(du_df, du_df_fd, atol=1e-4)
    npt.assert_allclose(du_dx_init, du_dxinit_fd, atol=1e-4)
示例#4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--n_state', type=int, default=3)
    parser.add_argument('--n_ctrl', type=int, default=3)
    parser.add_argument('--T', type=int, default=5)
    parser.add_argument('--save', type=str)
    parser.add_argument('--work', type=str, default='work')
    parser.add_argument('--no-cuda', action='store_true')
    parser.add_argument('--seed', type=int, default=0)
    args = parser.parse_args()

    args.cuda = not args.no_cuda and torch.cuda.is_available()
    t = '.'.join([
        "{}={}".format(x, getattr(args, x))
        for x in ['n_state', 'n_ctrl', 'T']
    ])
    setproctitle.setproctitle('bamos.' + t + '.{}'.format(args.seed))
    if args.save is None:
        args.save = os.path.join(args.work, t, str(args.seed))

    if os.path.exists(args.save):
        shutil.rmtree(args.save)
    os.makedirs(args.save, exist_ok=True)

    meta_file = os.path.join(args.save, 'meta.json')
    meta = create_experiment(args.n_state, args.n_ctrl, args.T)
    with open(meta_file, 'w') as f:
        json.dump(meta, f, indent=4)

    true_model = {}
    for k in ['Q', 'p', 'A', 'B']:
        v = torch.Tensor(np.array(meta[k])).double()
        if torch.cuda.is_available():
            v = v.cuda()
        v = Variable(v)
        meta[k] = v
        true_model[k] = v

    n_state, n_ctrl, alpha = args.n_state, args.n_ctrl, meta['alpha']
    npr.seed(1)  # Intentionally 1 instead of args.seed so these are the same.
    A_model = np.eye(n_state) + alpha * np.random.randn(n_state, n_state)
    B_model = npr.randn(n_state, n_ctrl)
    dtype = true_model['Q'].data.type()
    A_model = Parameter(torch.Tensor(A_model).type(dtype))
    B_model = Parameter(torch.Tensor(B_model).type(dtype))

    # u_lower, u_upper = -100., 100.
    u_lower, u_upper = -1., 1.
    delta = u_init = None

    optimizer = optim.RMSprop((A_model, B_model), lr=1e-2)

    torch.manual_seed(args.seed)

    fname = os.path.join(args.save, 'losses.csv')
    loss_f = open(fname, 'w')
    loss_f.write('im_loss,mse\n')
    loss_f.flush()

    n_batch = 64
    for i in range(5000):
        x_init = Variable(1. * torch.randn(n_batch, n_state).type(dtype))
        optimizer.zero_grad()

        try:
            F = torch.cat((true_model['A'], true_model['B']), dim=1) \
                .unsqueeze(0).unsqueeze(0).repeat(args.T, n_batch, 1, 1)
            x_true, u_true, objs_true = mpc.MPC(
                n_state,
                n_ctrl,
                args.T,
                x_init,
                u_lower=u_lower,
                u_upper=u_upper,
                u_init=u_init,
                mpc_iter=100,
                verbose=-1,
                exit_unconverged=False,
                detach_unconverged=False,
                F=F,
                n_batch=n_batch,
            )(true_model['Q'], true_model['p'])

            F = torch.cat((A_model, B_model), dim=1) \
                .unsqueeze(0).unsqueeze(0).repeat(args.T, n_batch, 1, 1)
            x_pred, u_pred, objs_pred = mpc.MPC(
                n_state,
                n_ctrl,
                args.T,
                x_init,
                u_lower=u_lower,
                u_upper=u_upper,
                u_init=u_init,
                mpc_iter=100,
                verbose=-1,
                exit_unconverged=False,
                detach_unconverged=False,
                F=F,
                n_batch=n_batch,
            )(true_model['Q'], true_model['p'])

            traj_loss = torch.mean((u_true - u_pred)**2)
            # torch.mean((x_true-x_pred)**2)
            traj_loss.backward()
            optimizer.step()
            # import ipdb; ipdb.set_trace()

            model_loss = torch.mean((A_model-true_model['A'])**2) + \
                         torch.mean((B_model-true_model['B'])**2)

            loss_f.write('{},{}\n'.format(traj_loss.data[0],
                                          model_loss.data[0]))
            loss_f.flush()

            plot_interval = 100
            if i % plot_interval == 0:
                os.system('./plot.py "{}" &'.format(args.save))
                print(A_model, true_model['A'])
            print('{:04d}: traj_loss: {:.4f} model_loss: {:.4f}'.format(
                i, traj_loss.data[0], model_loss.data[0]))
        except KeyboardInterrupt:
            raise
        except Exception as e:
            # print(e)
            # pass
            raise