def check_cost( cost: QuadCost, n_state: int, n_ctrl: int, horizon: int, stationary: bool, linear: bool, ): assert_all_tensor(*cost) n_tau = n_state + n_ctrl C, c = cost assert_horizon_len(C, horizon) assert_horizon_len(c, horizon) assert_row_size(C, n_tau) assert_row_size(c, n_tau) assert_col_size(C, n_tau) eigval, _ = torch.linalg.eigh(nt.unnamed(C)) assert eigval.ge(0).all() assert linear or nt.allclose(c, torch.zeros_like(c)) if horizon > 1: assert stationary == nt.allclose(C, C.select("H", 0)) assert not linear or stationary == nt.allclose(c, c.select("H", 0))
def isstationary(dynamics: AnyDynamics) -> bool: """Returns whether the dynamics are stationary (time-invariant).""" return ( nt.allclose(dynamics.F, dynamics.F.select("H", 0)) and nt.allclose(dynamics.f, dynamics.f.select("H", 0)) and ( isinstance(dynamics, LinDynamics) or nt.allclose(dynamics.W, dynamics.W.select("H", 0)) ) )
def test_terminal(self, module, last_obs: Tensor, act: Tensor): val = module(last_obs, act) assert torch.is_tensor(val) assert nt.allclose(val, torch.zeros([])) val.sum().backward() assert last_obs.grad is not None and act.grad is not None assert nt.allclose(last_obs.grad, torch.zeros([])) assert nt.allclose(act.grad, torch.zeros([]))
def test_gains(self, module: TVLinearFeedback): for par in module.parameters(): par.grad = torch.randn_like(par) K, k = module.gains() assert nt.allclose(K, module.K) assert nt.allclose(k, module.k) assert K.grad is not None assert k.grad is not None assert nt.allclose(K.grad, module.K.grad) assert nt.allclose(k.grad, module.k.grad)
def test_call(self, module: QuadraticReward, obs: Tensor, act: Tensor): val = module(obs, act) assert torch.is_tensor(val) assert torch.isfinite(val).all() val.sum().backward() assert obs.grad is not None and act.grad is not None s_grad, t_grad = unpack_obs(nt.vector(obs.grad)) assert not nt.allclose(s_grad, torch.zeros_like(s_grad)) assert torch.isfinite(s_grad).all() assert nt.allclose(t_grad, torch.zeros_like(t_grad)) assert not nt.allclose(act.grad, torch.zeros_like(act)) assert torch.isfinite(act.grad).all()
def check_dynamics_covariance(W: Tensor, n_state: int, horizon: int, stationary: int, sample_covariance: bool): assert_horizon_len(W, horizon) assert_row_size(W, n_state) assert_col_size(W, n_state) assert nt.allclose(W, nt.transpose(W)) eigval, _ = torch.linalg.eigh(nt.unnamed(W)) assert eigval.gt(0).all() assert sample_covariance != nt.allclose(W, nt.matrix(torch.eye(n_state))) # noinspection PyTypeChecker assert (horizon == 1 or not sample_covariance or stationary == nt.allclose(W, W.select("H", 0)))
def test_detach_linear(self, module: TVLinearFeedback, linear: lqr.Linear): before = tuple(x.clone() for x in linear) module.copy_(linear) for par in module.parameters(): par.data.add_(1.0) assert all(list(nt.allclose(b, a) for b, a in zip(before, linear)))
def test_stabilizing_policy( self, dynamics: lqr.LinSDynamics, n_state: int, n_ctrl: int, horizon: int, seed: int, ): K, k = stabilizing_policy(dynamics, rng=seed) assert torch.is_tensor(K) assert torch.isfinite(K).all() assert K.size("R") == n_ctrl assert K.size("C") == n_state assert torch.is_tensor(k) assert nt.allclose(k, torch.zeros_like(k)) assert k.size("R") == n_ctrl assert K.size("H") == k.size("H") == horizon A, B = (x.numpy() for x in stationary_dynamics_factors(dynamics)) # noinspection PyTypeChecker K = K.select("H", 0).numpy() eigval, _ = np.linalg.eig(A + B @ K) assert np.all(np.abs(eigval) < 1.0)
def test_cholesky(spdm: Tensor): scale_tril = nt.cholesky(spdm) assert scale_tril.shape == spdm.shape assert scale_tril.names == spdm.names assert scale_tril.dtype == spdm.dtype assert (nt.diagonal(scale_tril) >= 0).all() assert nt.allclose(scale_tril @ nt.transpose(scale_tril), spdm)
def test_make_linsdynamics( lindynamics: LinDynamics, n_state: int, horizon: int, stationary: bool, sample_covariance: bool, ): linsdynamics = make_linsdynamics(lindynamics, stationary=stationary, sample_covariance=sample_covariance) F, f = lindynamics F_new, f_new, W = linsdynamics assert nt.allclose(F, F_new) assert nt.allclose(f, f_new) check_dynamics_covariance(W, n_state, horizon, stationary, sample_covariance)
def check_dynamics( dynamics: Union[LinDynamics, LinSDynamics], n_state: int, n_ctrl: int, horizon: int, stationary: bool, controllable: bool, transition_bias: bool, sample_covariance: Optional[bool] = None, ): # pylint:disable=too-many-locals assert_all_tensor(*dynamics) if isinstance(dynamics, LinDynamics): (F, f), W = dynamics, None else: F, f, W = dynamics assert_horizon_len(F, horizon) assert_horizon_len(f, horizon) assert_row_size(F, n_state) assert_row_size(f, n_state) assert_col_size(F, n_state + n_ctrl) if controllable: A, B = stationary_dynamics_factors(dynamics) A, B = A.numpy(), B.numpy() ctrb = np.concatenate( [np.linalg.matrix_power(A, i) @ B for i in range(n_state)], axis=-1) assert np.linalg.matrix_rank(ctrb) == n_state if not transition_bias: assert nt.allclose(torch.zeros_like(f), f) if horizon > 1: assert stationary == nt.allclose(F, F.select("H", 0)) assert not transition_bias or stationary == nt.allclose( f, f.select("H", 0)) if W is not None: check_dynamics_covariance(W, n_state, horizon, stationary, sample_covariance)
def test_from_existing(self, dynamics: LinSDynamics, stationary: bool): before = tuple(x.clone() for x in dynamics) module = LinearDynamicsModule.from_existing(dynamics, stationary=stationary) for par in module.parameters(): par.data.sub_(1.0) for bef, aft in zip(before, dynamics): assert nt.allclose(bef, aft)
def test_init(self, module: CholeskyFactor, shape: tuple[int, ...]): assert hasattr(module, "beta") assert hasattr(module, "ltril") assert hasattr(module, "pre_diag") assert isinstance(module.ltril, nn.Parameter) assert isinstance(module.pre_diag, nn.Parameter) assert module.ltril.shape == shape assert module.pre_diag.shape == shape[:-1] cholesky = module() assert nt.allclose(cholesky, nt.matrix(torch.eye(shape[-1])))
def test_terminal_value(self, qvalue: QuadQValue, last_obs: Tensor, act: Tensor): val = qvalue(last_obs, act) assert torch.is_tensor(val) assert val.shape == last_obs.shape[:-1] == act.shape[:-1] assert val.dtype == last_obs.dtype == act.dtype assert nt.allclose(val, torch.zeros_like(val)) val.mean().backward() assert last_obs.grad is not None and act.grad is not None assert torch.allclose(last_obs.grad, torch.zeros_like(last_obs.grad)) assert torch.allclose(act.grad, torch.zeros_like(act.grad))
def test_absorving(self, module: StochasticModel, last_obs: Tensor, act: Tensor): params = module(last_obs, act) sample, logp = module.rsample(params) assert sample.shape == last_obs.shape assert sample.names == last_obs.names state, time = unpack_obs(last_obs) state_, time_ = unpack_obs(sample) assert nt.allclose(state, state_) assert time.eq(time_).all() assert sample.grad_fn is not None sample.sum().backward(retain_graph=True) assert last_obs.grad is not None expected_grad = torch.cat( [torch.ones_like(state), torch.zeros_like(time)], dim="R") assert nt.allclose(last_obs.grad, expected_grad) assert nt.allclose(act.grad, torch.zeros(())) last_obs.grad, act.grad = None, None assert logp.shape == tuple( s for s, n in zip(last_obs.shape, last_obs.names) if n != "R") assert logp.names == tuple(n for n in last_obs.names if n != "R") assert nt.allclose(logp, torch.zeros(())) logp.sum().backward() assert nt.allclose(last_obs.grad, torch.zeros(())) assert nt.allclose(act.grad, torch.zeros(()))
def test_log_prob(self, module: StochasticModel, obs: Tensor, act: Tensor, new_obs: Tensor): params = module(obs, act) log_prob = module.log_prob(new_obs, params) _, time = unpack_obs(obs) _, time_ = unpack_obs(new_obs) time, time_ = nt.vector_to_scalar(time, time_) assert torch.is_tensor(log_prob) assert torch.isfinite(log_prob).all() assert log_prob.shape == time.shape == time_.shape assert log_prob.names == time.names == time_.names assert log_prob.grad_fn is not None log_prob.sum().backward() assert obs.grad is not None assert act.grad is not None assert not nt.allclose(obs.grad, torch.zeros(())) assert not nt.allclose(act.grad, torch.zeros(())) grads = list(p.grad for p in module.parameters()) assert all(list(g is not None for g in grads)) assert all(list(not torch.allclose(g, torch.zeros(())) for g in grads))
def test_copy_(self, vvalue: QuadVValue, params: Quadratic): old_params = tuple(x.clone() for x in vvalue.standard_form()) before = [p.clone() for p in vvalue.parameters()] vvalue.copy_(params) after = [p.clone() for p in vvalue.parameters()] allclose_parameters = [ torch.allclose(b, a) for b, a in zip(before, after) ] allclose_quadratics = [ nt.allclose(a, b) for a, b in zip(params, old_params) ] assert all(allclose_parameters) == all(allclose_quadratics)
def check_val_backprop(self, vvalue: QuadVValue, obs: Tensor): assert obs.grad is None val = vvalue(obs) assert torch.is_tensor(val) assert val.shape == obs.shape[:-1] assert val.dtype == obs.dtype assert torch.isfinite(val).all() vvalue.zero_grad() val.mean().backward() assert obs.grad is not None assert not nt.allclose(obs.grad, torch.zeros_like(obs))
def test_call(self, module: CholeskyFactor, size: int): L = module() assert torch.is_tensor(L) assert torch.isfinite(L).all() L.sum().backward() assert nt.allclose(torch.triu(module.ltril.grad, diagonal=0), torch.zeros([])) tril_idxs = torch.tril_indices(size, size, offset=-1) assert not torch.isclose( module.ltril.grad[..., tril_idxs[0], tril_idxs[1]], torch.zeros( [])).any() assert not torch.isclose(module.pre_diag.grad, torch.zeros([])).any()
def test_standard_form( self, module: QuadraticReward, n_state: int, n_ctrl: int, horizon: int ): cost = module.standard_form() assert isinstance(cost, lqr.QuadCost) n_tau, horizon_ = lqr.dims_from_cost(cost) assert n_tau == n_state + n_ctrl assert horizon_ == horizon (cost.C.sum() + cost.c.sum()).backward() for p in module.parameters(): assert p.grad is not None assert nt.allclose(p.grad, torch.ones_like(p))
def test_call( self, qvalue: QuadQValue, obs: Tensor, act: Tensor, n_state: int, n_ctrl: int, horizon: int, ): # pylint:disable=too-many-arguments assert qvalue.n_tau == n_state + n_ctrl assert qvalue.horizon == horizon val = qvalue(obs, act) assert torch.is_tensor(val) assert val.shape == obs.shape[:-1] == act.shape[:-1] assert val.dtype == obs.dtype == act.dtype assert torch.isfinite(val).all() val.mean().backward() assert obs.grad is not None assert not nt.allclose(obs.grad, torch.zeros_like(obs)) assert act.grad is not None assert not nt.allclose(act.grad, torch.zeros_like(act))
def test_log_prob(self, module: InitStateDynamics, obs: Tensor): log_prob = module.log_prob(obs) assert log_prob.shape == obs.shape[:-1] assert log_prob.dtype == torch.float32 _, time = unpack_obs(obs) assert log_prob.names == nt.vector_to_scalar(time).names assert log_prob.grad_fn is not None log_prob.sum().backward() assert obs.grad is not None assert not nt.allclose(obs.grad, torch.zeros_like(obs.grad)) grads = list(p.grad for p in module.parameters()) assert all(list(g is not None for g in grads)) assert all( list(not torch.allclose(g, torch.zeros_like(g)) for g in grads))
def test_terminal_call(self, module: TVLinearPolicy, last_obs: Tensor, n_ctrl: int): act = module(last_obs) assert nt.allclose(act, torch.zeros(())) assert torch.is_tensor(act) assert torch.isfinite(act).all() assert act.names == last_obs.names assert act.size("R") == n_ctrl act.sum().backward() assert last_obs.grad is not None assert torch.allclose(last_obs.grad, torch.zeros(())) grads = [p.grad for p in module.parameters()] assert all(list(g is not None for g in grads)) assert all(list(torch.allclose(g, torch.zeros(())) for g in grads))
def test_from_existing(self, init: lqr.GaussInit): module = InitStateDynamics.from_existing(init) assert all( nt.allclose(a, b) for a, b in zip(init, module.standard_form()))
def test_from_existing(self, linear: lqr.Linear): module = TVLinearFeedback.from_existing(linear) params = module.gains() assert all(list(nt.allclose(p, l) for p, l in zip(params, linear)))
def check_quadratic_parameters(module: QuadraticMixin, quadratic: Quadratic): quad, linear, const = quadratic assert nt.allclose(module.quad, quad) assert nt.allclose(module.linear, linear) assert nt.allclose(module.const, const)
def allclose_cost(cost1: QuadCost, cost2: QuadCost) -> bool: equal = [nt.allclose(c1, c2) for c1, c2 in zip(cost1, cost2)] return all(equal)
def allclose_dynamics(dyn1: LinSDynamics, dyn2: LinSDynamics) -> bool: equal = [nt.allclose(d1, d2) for d1, d2 in zip(dyn1, dyn2)] return all(equal)