def input(self, state: StateType, t: int) -> np.ndarray: if not self._solved: raise RuntimeError('Need to call DiscretePolicy.solve() before asking for inputs.') if self._policy_type == 'trv': return self._K[:, :, t] @ state + self._h[:, t] else: return self._K[:, :, t] @ dists.GaussianDist(self._C[:, :, t] @ state + self._a[:, t], self._Sigma_eta[:, :, t]).sample() + self._h[:, t]
def marginal(self, prior: dists.GaussianDist) -> dists.GaussianDist: """ Computes the distribution of Y resulting from a prior over X. :param prior: The assumed Gaussian prior on X. :return: The marginal Gaussian distribution of Y. """ return dists.GaussianDist( self._A @ prior.mean() + self._b, self._A @ prior.cov() @ self._A.transpose() + self._cov)
def conditional(self, chan_input: np.ndarray) -> dists.GaussianDist: """ Computes the distribution of Y assuming X = x. :param chan_input: The integer x. :return: A Gaussian distribution over a set of size m representing the resulting distribution of Y. """ mean = self._A @ chan_input.flatten() + self._b cov = self._cov return dists.GaussianDist(mean, cov)
def test_sample(self): np.random.seed(0) n = 100000 dist1 = dists.GaussianDist(np.ones(1), 2 * np.eye(1)) dist2 = dists.GaussianDist(np.zeros((3, 1)), np.eye(3)) samples = dist1.sample(n) mean = samples.sum() / n var = (1 / n) * ((samples - mean) ** 2).sum() self.assertAlmostEqual(mean, 1, 2) self.assertAlmostEqual(var, 2, 1) samples = dist2.sample(n) mean = samples.sum(axis=1) / n deviation = samples - (mean.reshape((3, 1)) @ np.ones((1, n))) cov = (1 / (n - 1)) * (deviation @ deviation.transpose()) self.assertTrue(np.allclose(mean, np.zeros((3, 1)), atol=0.01)) self.assertTrue(np.allclose(cov, np.eye(3), atol=0.01))
def test_posterior(self): np.random.seed(0) # This example is taken from the end of: http://web.stanford.edu/class/ee363/lectures/estim.pdf channel = channels.LGChannel( np.array([[np.cos(np.pi / 6), np.sin(np.pi / 6)]]), np.ones((1)), np.eye(1)) input = dists.GaussianDist(np.array([1, 1]), np.array([[2**2, 0], [0, 0.5**2]])) sample_input = input.sample() conditional = channel.conditional(sample_input) sample_output = conditional.sample() posterior = channel.posterior(input, sample_output) self.assertTrue( np.allclose(posterior.cov(), np.array([[1.046, -0.107], [-0.107, 0.246]]), atol=0.01)) # One more time with a different channel to test the mean channel = channels.LGChannel( np.array([[np.cos(i * np.pi / 12), np.sin(i * np.pi / 12)] for i in range(24)]), np.ones((24)), np.eye(24)) input = dists.GaussianDist(np.array([1, 1]), np.array([[2**2, 0], [0, 0.5**2]])) sample_input = input.sample() conditional = channel.conditional(sample_input) sample_output = conditional.sample() posterior = channel.posterior(input, sample_output) self.assertTrue( np.allclose(posterior.mean(), sample_input.flatten(), atol=0.6))
def joint(self, chan_input: dists.GaussianDist) -> dists.GaussianDist: """ Computes the joint Gaussian distribution for (X, Y). :param chan_input: The Gaussian distribution of X. :return: A the Gaussian distribution of (X, Y) with n + m variables. """ mean = self._A @ chan_input.mean() + self._b cov = self._A @ chan_input.cov() @ self._A.transpose() + self._cov return dists.GaussianDist( np.block([chan_input.mean(), mean]), np.block( [[chan_input.cov(), chan_input.cov() @ self._A.transpose()], [self._A.transpose() @ chan_input.cov(), cov]]))
def test_ilqr(self): g = np.zeros((4, 4)) g[0, -1] = 3.2 Qf = np.zeros((4, 4)) Qf[0, 0] = 1 s = Slip(init_dist=dists.GaussianDist( np.array([0, 0.3927, -3.2733, -6.7881]), 1e-3 * np.eye(4)), horizon=3, proc_cov=1e-4 * np.diag(np.array([1, 0.1, 0.5, 0.5])), meas_cov=1e-4 * np.eye(4), Q=np.zeros((4, 4, 3)), g=g, R=10 * np.ones((1, 1, 3)), w=np.zeros((1, 3)), Qf=Qf) policy = ILQRPolicy(s) policy.solve(iters=5, verbose=True) self.assertAlmostEqual(policy._state_traj[0, -1], g[0, -1], places=3)
def posterior(self, prior: dists.GaussianDist, output: np.ndarray) -> dists.GaussianDist: """ Computes the posterior distribution over X given Y = y. Reference: https://web.stanford.edu/class/ee363/lectures/estim.pdf :param prior: A finite distribution over n elements representing assumed prior distribution over X. :param output: An m-vector representing the observed value of y. :return: A finite distribution over n elements representing the posterior distribution over X. """ B = prior.cov() @ self._A.transpose() @ np.linalg.inv( self._A @ prior.cov() @ self._A.transpose() + self._cov) output_mean = self._A @ prior.mean() + self._b return dists.GaussianDist( prior.mean() + B @ (output.flatten() - output_mean), np.linalg.inv( self._A.transpose() @ np.linalg.inv(self._cov) @ self._A + np.linalg.inv(prior.cov())))
def solve(self, tradeoff: float, iters: int = 10, initial_inputs: Union[None, np.ndarray] = None, init_K: Union[None, np.ndarray] = None, init_h: Union[None, np.ndarray] = None, init_verbose: bool = False, relinearize_every: int = 5): A = np.zeros((self._problem.n_states, self._problem.n_states, self._problem.horizon)) B = np.zeros((self._problem.n_states, self._problem.n_inputs, self._problem.horizon)) C = np.dstack([np.eye(self._trv_size, self._problem.n_states)] * self._problem.horizon) a = np.zeros(self._trv_size, self._problem.horizon) Sigma_eta = np.zeros((self._trv_size, self._trv_size, self._problem.horizon)) for t in range(self._problem.horizon): Sigma_eta[:, :, t] = 0.01 * np.random.rand(self._trv_size, self._trv_size) Sigma_eta[:, :, t] = Sigma_eta[:,:, t] * Sigma_eta[:,:, t].transpose() if init_K is None: K = np.zeros((self._problem.n_inputs, self._trv_size, self._problem.horizon)) else: K = init_K.copy() if init_h is None: h = np.zeros((self._problem.n_inputs, self._problem.horizon)) else: h = init_h.copy() Q = self._problem._Q R = self._problem._R P = np.zeros(self._problem.n_states, self._problem.n_states, self._problem.horizon + 1) b = np.zeros(self._problem.n_states, self._problem.horizon + 1) delta_states = [dists.GaussianDist(np.zeros(self._problem.n_states), np.zeros((self._problem.n_states, self._problem.n_states))) for i in range(self._problem.horizon)] delta_inputs = [dists.GaussianDist(np.zeros(self._problem.n_inputs), np.zeros((self._problem.n_inputs, self._problem.n_inputs))) for i in range(self._problem.horizon)] nominal_states = np.zeros((self._problem.n_states, self._problem.horizon + 1)) nominal_states[:, 0] = self._problem.init_dist.mean() if initial_inputs is None: nominal_inputs = initial_inputs.copy() else: nominal_inputs = np.zeros((self._problem.n_inputs, self._problem.horizon)) for t in range(self._problem.horizon): nominal_states[:, t + 1] = self._problem.dynamics(nominal_states[:, t], nominal_inputs[:, t], t).mean() relinearize = False obj_val = np.inf obj_hist = np.zeros(iters) mi_total = 0 expected_cost_total = 0 best_expected_cost = np.inf best_mi = np.inf for iter in range(iters): # Forward dynamics expected_cost_total = 0 mi_total = 0 for t in range(self._problem.horizon): delta_inputs[t] = dists.GaussianDist(K[:, :, t] @ (C[:, :, t] @ delta_states[t].mean() + a[:, t]) + h[:, t], K[:, :, t] @ (C[:, :, t] @ delta_states[t].cov() @ C[:, :, t].transpose() + Sigma_eta[:, :, t]) @ K[:, :, t].transpose()) A[:,:, t], B[:,:, t] = self._problem.linearize_dynamics(nominal_states[:, t], nominal_inputs[:, t], t) # PEP8? What's that? # These bits are far more readable as long lines. delta_states[t + 1] = dists.GaussianDist((self._problem.dynamics(nominal_states[:, t] + delta_states[t].mean(), nominal_inputs[:, t] + delta_inputs[t], t) - nominal_states[:, t + 1]).mean(), (A[:, :, t] + B[:, :, t] @ K[:, :, t] @ C[:, :, t]) @ delta_states[t].cov() @ (A[:,:, t] + B[:, :, t] @ K[:, :, t] @ C[:, :, t]).transpose() + (B[:,:, t] @ K[:, :, t]) @ Sigma_eta[:, :, t] @ (B[:, :, t] @ K[:,:, t]).transpose() + self._problem._proc_cov) expected_cost_total += self._problem.cost(nominal_states[:, t] + delta_states[t].mean(), nominal_inputs[:, t] + delta_inputs[:, t].mean(), t) # TODO: Test mutual info computation. mi_total += channels.LGChannel(C[:, :, t], a[:, t], Sigma_eta[:,:, t]).mutual_info(delta_states[t]) obj_hist[iter] = expected_cost_total + (1 / tradeoff) * mi_total if obj_hist[iter] < obj_val: self._C = C.copy() self._a = a.copy() self._K = K.copy() self._Sigma_eta = Sigma_eta.copy() self._h = h.copy() self._nominal_inputs = nominal_inputs self._nominal_states = nominal_states self._A = A self._B = B for t in range(self._problem.horizon + 1): self._delta_states[t] = delta_states[t] relinearize = True if iter % relinearize_every == 0 and relinearize: relinearize = False for t in range(self._problem.horizon): nominal_inputs[:, t] = nominal_inputs + delta_inputs[t].mean() nominal_states[:, t + 1] = self._problem.dynamics(nominal_states[:, t], nominal_inputs[:, t], t).mean() continue delta_g = self._problem._g - nominal_states delta_w = self._problem._w - nominal_inputs P[:, :, -1] = self._problem._Qf b[:, -1] = -Q[:, :, -1] @ delta_g[:, -1] # Here be dragons... for t in range(self._problem.horizon, -1, -1): # TRV Given State Map: Sigma_eta[:, :, t] = np.linalg.inv(tradeoff * K[:, :, t].transpose() @ (B[:, :, t].transpose() @ P[:, :, t + 1] @ B[:, :, t] + R[:, :, t]) @ K[:, :, t] + np.linalg.inv(C[:, :, t] @ delta_states[t].cov() @ C[:, :, t].transpose() + Sigma_eta[:, :, t])) F = np.linalg.inv(C[:, :, t] @ delta_states[t].cov() @ C[:, :, t].transpose() + Sigma_eta[:, :, t]) C[:, :, t] = -tradeoff * Sigma_eta[:, :, t] @ K[:, :, t].transpose() @ B[:, :, t].transpose() @ P[:, :, t + 1] @ A[:, :, t] a[:, t] = -Sigma_eta[:, :, t] @ (tradeoff * K[:, :, t].transpose() @ B[:, :, t].transpose() @ (b[:, t + 1] + P[:, :, t + 1] @ B[:, :, t] @ h[:, t]) + tradeoff * K[:, :, t].transpose() @ R[:, :, t] @ (h[:, t] - delta_w[:, t]) - F * (C[:, :, t] * delta_states[t].mean() + a[:, :, t])) # Input Given TRV Map: # First some shorthand x_bar = delta_states[t].mean() Sigma_x = delta_states[t].cov() x_tilde_bar = C[:, :, t] @ x_bar + a[:, t] Sigma_x_tilde = C[:, :, t] @ Sigma_x @ C[:, :, t].transpose() + Sigma_eta[:, :, t] cpK = cp.Variable((self._problem.n_inputs, self._trv_size)) cph = cp.Variable(self._problem.n_inputs) objective = 0.5 * (cpK @ x_tilde_bar + cph - delta_w[:, t]).T @ R[:, :, t] @ (cpK @ x_tilde_bar + cph - delta_w[:, t]) + 0.5 * cp.trace(cpK.T @ R[:, :, t] @ cpK @ Sigma_x_tilde) + 0.5 @ x_bar @ A[:, :, t].transpose() @ P[:, :, t + 1] @ A[:, :, t] @ x_bar + 0.5 * x_bar.transpose() @ (A[:, :, t].transpose() @ P[:, :, t + 1] @ B[:, :, t] @ cpK @ C[:, :, t] + C[:, :, t].transpose() @ cpK.T @ B[:, :, t].transpose() @ P[:, :, t + 1] @ A[:, :, t]) @ x_bar + x_bar.transpose() @ A[:, :, t].transpose() @ P[:, :, t + 1] @ B[:, :, t] @ cpK @ a[:, t] + x_bar.transpose() @ A[:, :, t].transpose() @ P[:, :, t + 1] @ B[:, :, t] @ cph + 0.5 * x_tilde_bar.transpose() * cpK.T @ B[:, :, t].transpose() @ P[:, :, t + 1] @ B[:, :, t] @ cpK @ x_tilde_bar + x_tilde_bar.transpose() @ cpK.T @ B.transpose() @ P[:, :, t + 1] @ B[:, :, t] @ cph + 0.5 @ cph.T @ B.transpose() @ P[:, :, t + 1] @ B[:, :, t] @ cph + b[:, t + 1].transpose() @ (A[:, :, t] @ x_bar + B[:, :, t] @ cpK @ x_tilde_bar + B[:, :, t] @ cph) + 0.5 * cp.trace(Sigma_x @ A[:, :, t].transpose() @ P[:, :, t + 1] @ A[:, :, t] + Sigma_x @ (A[:, :, t].transpose() @ P[:, :, t + 1] @ B[:, :, t] @ K @ C[:, :, t] + C[:, :, t].transpose() @ cpK.T @ B[:, :, t].transpose() @ P[:, :, t + 1] @ A[:, :, t])) + cp.trace(Sigma_x_tilde @ cpK.T @ B.transpose() @ P[:, :, t + 1] @ B[:, :, t] @ cpK) prob = cp.Problem(cp.Minimize(objective), []) prob.solve(solver=cp.MOSEK) K[:, :, t] = cpK.value.copy() h[:, t] = cph.value.copy() # Value Function: G = C[:, :, t].transpose() @ F @ C[:, :, t] P[:, :, t] = Q[:, :, t] + (1 / tradeoff) * G + C.transpose() @ K.transpose() @ R @ K @ C + (A + B @ K @ C).transpose() @ P @ (A + B @ K @ C) b[:, t] = (A[:, :, t] + B[:, :, t] @ K[:, :, t] @ C[:, :, t]).transpose() @ P[:, :, t + 1] @ B[:, :, t] @ K[:, :, t] @ a[:, t] - Q[:, :, t] @ delta_g[:, t] - (1 / tradeoff) * G @ delta_states[t].mean() + C[:, :, t].transpose() @ K[:, :, t].transpose() @ R[:, :, t] @ K[:, :, t] @ a[:, t] + (A[:, :, t] + B[:, :, t] @ K[:, :, t] @ C[:, :, t]).transpose() @ b[:, t + 1] + C[:, :, t].transpose() @ K[:, :, t].transpose() @ R[:, :, t] @ h[:, t] - C[:, :, t].transpose() @ K[:, :, t].transpose() @ R[:, :, t] @ delta_w[:, t] + A[:, :, t].transpose() @ P[:, :, t + 1] @ B[:, :, t] @ h[:, t] + C[:, :, t].transpose() @ K[:, :, t].transpose() @ B[:, :, t].transpose() @ P[:, :, t + 1] @ B[:, :, t] @ h[:, t]
def sensor(self, state: StateType, t: int) -> dists.Distribution: return dists.GaussianDist(state, self._meas_cov)
def dynamics(self, state: StateType, input: InputType, t: int) -> dists.Distribution: return dists.GaussianDist(slip_return_map(state, input, self), self._proc_cov)
def sensor(self, state: np.ndarray, t: int) -> dists.GaussianDist: return dists.GaussianDist(self._C[:, :, t] @ state, self._meas_cov)
def dynamics(self, state: np.ndarray, input: np.ndarray, t: int) -> dists.GaussianDist: return dists.GaussianDist( self._A[:, :, t] @ state + self._B[:, :, t] @ input, self._proc_cov)