def make_lqr_linear_navigation( goal: Union[np.ndarray, tuple[float, float]], beta: float, horizon: int) -> tuple[LinDynamics, QuadCost, Box]: """Goal-oriented 2D Navigation task encoded as an LQR. Args: goal: 2D coordinates of goal position beta: Penalty coefficient for control magnitude horizon: Integer number of decision steps Source:: https://github.com/renato-scaroni/backpropagation-planning/blob/master/src/Modules/Envs/lqr.py """ goal = np.asarray(goal) state_size = ctrl_size = goal.shape[0] F = np.concatenate([np.identity(state_size), np.identity(ctrl_size)], axis=1) F = utils.np_expand_horizon(F, horizon) f = np.zeros((horizon, state_size)) C = np.diag([2.0] * state_size + [2.0 * beta] * ctrl_size) C = utils.np_expand_horizon(C, horizon) c = np.concatenate([-2.0 * goal, np.zeros((ctrl_size, ))], axis=0) c = utils.np_expand_horizon(c, horizon) bounds: Box = (-torch.ones(ctrl_size), torch.ones(ctrl_size)) # Avoid tensor writing to un-writable np.array F, f, C, c = map(lambda x: as_float_tensor(x.copy()), (F, f, C, c)) dynamics, cost = refine_lqr((F, f), (C, c)) return dynamics, cost, bounds
def make_gaussinit( state_size: int, n_batch: Optional[int] = None, sample_covariance: bool = False, rng: RNG = None, ) -> GaussInit: """Generate parameters for Gaussian initial state distribution. Args: state_size: size of state vector n_batch: batch size, if any sample_covariance: whether to sample a random SPD matrix for the Gaussian covariance or use the identity matrix. rng: random number generator, seed, or None """ # pylint:disable=invalid-name vec_shape = (state_size, ) batch_shape = () if n_batch is None else (n_batch, ) mu = torch.zeros(batch_shape + vec_shape) if sample_covariance: sig = as_float_tensor( make_spd_matrix(state_size, sample_shape=batch_shape, rng=rng)) else: sig = torch.eye(state_size) return GaussInit( mu=utils.expand_and_refine(nt.vector(mu), 1, n_batch=n_batch), sig=utils.expand_and_refine(nt.matrix(sig), 2, n_batch=n_batch), )
def random_spd_matrix( size: int, horizon: int, stationary: bool = False, n_batch: Optional[int] = None, rng: RNG = None, ) -> Tensor: # pylint:disable=missing-function-docstring mat = make_spd_matrix( size, sample_shape=minimal_sample_shape( horizon, stationary=stationary, n_batch=n_batch ), rng=rng, ) mat = nt.matrix(as_float_tensor(mat)) mat = expand_and_refine(mat, 2, horizon=horizon, n_batch=n_batch) return mat
def random_normal_vector( size: int, horizon: int, stationary: bool = False, n_batch: Optional[int] = None, rng: RNG = None, ) -> Tensor: # pylint:disable=missing-function-docstring rng = np.random.default_rng(rng) vec_shape = (size,) shape = ( minimal_sample_shape(horizon, stationary=stationary, n_batch=n_batch) + vec_shape ) vec = nt.vector(as_float_tensor(rng.normal(size=shape))) vec = expand_and_refine(vec, 1, horizon=horizon, n_batch=n_batch) return vec
def random_uniform_matrix( row_size: int, col_size: int, horizon: int, stationary: bool = False, low: float = 0.0, high: float = 1.0, n_batch: Optional[int] = None, rng: RNG = None, ) -> Tensor: """Matrix with Uniform i.i.d. entries.""" # pylint:disable=too-many-arguments mat_shape = (row_size, col_size) shape = ( minimal_sample_shape(horizon, stationary=stationary, n_batch=n_batch) + mat_shape ) mat = nt.matrix(as_float_tensor(rng.uniform(low=low, high=high, size=shape))) mat = expand_and_refine(mat, 2, horizon=horizon, n_batch=n_batch) return mat
def make_lindynamics( state_size: int, ctrl_size: int, horizon: int, stationary: bool = False, n_batch: Optional[int] = None, passive_eigval_range: Optional[tuple[float, float]] = None, controllable: bool = False, bias: bool = True, rng: RNG = None, ) -> LinDynamics: """Generate linear transition matrices. Args: state_size: size of state vector ctrl_size: size of control vector horizon: length of the horizon stationary: whether dynamics vary with time n_batch: batch size, if any passive_eigval_range: range of eigenvalues for the unnactuated system. If None, samples the F_s matrix entries independently from a standard normal distribution controllable: whether to ensure the actuator dynamics (the B matrix of the (A,B) pair) make the system controllable bias: whether to use a non-zero bias vector for transition dynamics rng: random number generator, seed, or None Raises: ValueError: if `controllable` is True but not `stationary` """ # pylint:disable=too-many-arguments if controllable and not stationary: raise ValueError( "Controllable non-stationary dynamics are unsupported.") rng = np.random.default_rng(rng) Fs, _, eigvec = generate_passive( state_size, eigval_range=passive_eigval_range, horizon=horizon, stationary=stationary, n_batch=n_batch, rng=rng, ) Fa = generate_active(Fs, ctrl_size, eigvec=eigvec, controllable=controllable, rng=rng) Fs = utils.expand_and_refine(nt.matrix(as_float_tensor(Fs)), 2, horizon=horizon, n_batch=n_batch) Fa = utils.expand_and_refine(nt.matrix(as_float_tensor(Fa)), 2, horizon=horizon, n_batch=n_batch) F = torch.cat((Fs, Fa), dim="C") if bias: f = random_unit_vector( state_size, sample_shape=utils.minimal_sample_shape(horizon, stationary, n_batch), rng=rng, ) else: f = np.zeros(state_size) f = nt.vector(as_float_tensor(f)) f = utils.expand_and_refine(f, 1, horizon=horizon, n_batch=n_batch) return LinDynamics(F, f)