A = np.array([[0.7, 0.3, 0.2], [-0.2, 0.4, 0.5], [-0.4, 0.2, -0.3]]) B = np.array([[0.5, -0.3], [0.8, 0.3], [0.1, 0.9]]) Q = np.eye(3) R = np.eye(2) S0 = np.eye(3) Aa = 0.1 * np.array([[2, 9, -6], [9, 9, 4], [-9, -2, 5]]) Aa = Aa[:, :, np.newaxis] Bb = 0.1 * np.array([[8, 8], [3, 3], [-6, 6]]) Bb = Bb[:, :, np.newaxis] a = np.array([[0.1]]) b = np.array([[0.1]]) SS = LQRSysMult(A, B, a, Aa, b, Bb, Q, R, S0) # Start with an initially stabilizing (feasible) controller; # for this example the system is open-loop mean-square stable SS.setK(np.zeros([SS.m, SS.n])) # Policy gradient options PGO = PolicyGradientOptions(epsilon=(1e-2) * SS.Kare.size, eta=1e-3, max_iters=1000, disp_stride=1, keep_hist=True, opt_method='proximal', keep_opt='last', step_direction='gradient', stepsize_method='constant', exact=True, regularizer=Regularizer('vec1'), regweight=1.0, stop_crit='gradient',
def gradient_estimate_variance(noise, textfile, seed=1): npr.seed(seed) # Generate the system # Two states, diffusion w/ friction and multiplicative noise n = 2 m = 1 A = np.array([[0.8, 0.1], [0.1, 0.8]]) B = np.array([[1.0], [0.0]]) a = np.array([[0.1]]) Aa = np.array([[0.0, 1.0], [1.0, 0.0]])[:, :, np.newaxis] b = np.array([[0.0]]) Bb = np.array([[0.0], [0.0]])[:, :, np.newaxis] Q = np.eye(2) R = np.eye(1) S0 = np.eye(2) if noise: SS = LQRSysMult(A, B, a, Aa, b, Bb, Q, R, S0) else: SS = LQRSys(A, B, Q, R, S0) # Initialize # K0 = 0.01*np.ones([m,n]) K0 = np.zeros([m, n]) SS.setK(K0) K = np.copy(SS.K) print(SS.c) # Number of gradient estimates to collect for variance analysis n_iterc = 10 # Rollout length nt = 40 # Number of rollouts nr = int(1e4) # Exploration radius ru = 1e-2 G_est_all = np.zeros([n_iterc, m, n]) error_angle_all = np.zeros(n_iterc) error_scale_all = np.zeros(n_iterc) error_norm_all = np.zeros(n_iterc) headerstr_list = [] headerstr_list.append(' trial ') headerstr_list.append('error angle (deg)') headerstr_list.append(' error scale ') headerstr_list.append(' error norm') headerstr_list.append('true gradient norm') headerstr = " | ".join(headerstr_list) printout(headerstr, textfile) t_start = time() for iterc in range(n_iterc): # Estimate gradient using zeroth-order optimization # Draw random gain deviations and scale to Frobenius norm ball Uraw = npr.normal(size=[nr, SS.m, SS.n]) U = ru * Uraw / la.norm(Uraw, 'fro', axis=(1, 2))[:, None, None] # Stack dynamics matrices into a 3D array Kd = K + U # Simulate all rollouts together c = np.zeros(nr) # Draw random initial states x = npr.multivariate_normal(np.zeros(SS.n), SS.S0, nr) for t in range(nt): # Accumulate cost c += np.einsum('...i,...i', x, np.einsum('jk,...k', SS.QK, x)) # Calculate closed-loop dynamics AKr = SS.A + np.einsum('...ik,...kj', SS.B, Kd) if noise: for i in range(SS.p): AKr += (SS.a[i]**0.5) * npr.randn( nr)[:, np.newaxis, np.newaxis] * np.repeat( SS.Aa[np.newaxis, :, :, i], nr, axis=0) for j in range(SS.q): AKr += np.einsum( '...ik,...kj', (SS.b[j]**0.5) * npr.randn(nr)[:, np.newaxis, np.newaxis] * np.repeat(SS.Bb[np.newaxis, :, :, j], nr, axis=0), Kd) # Transition the state x = np.einsum('...jk,...k', AKr, x) # Estimate gradient Glqr = np.einsum('i,i...', c, U) Glqr *= K.size / (nr * (ru**2)) G_est = Glqr G_act = SS.grad error_angle = (360 / (2 * np.pi)) * np.arccos( np.sum((G_est * G_act)) / (la.norm(G_est) * la.norm(G_act))) error_scale = (la.norm(G_est) / la.norm(G_act)) error_norm = la.norm(G_est - G_act) G_est_all[iterc] = G_est error_angle_all[iterc] = error_angle error_scale_all[iterc] = error_scale error_norm_all[iterc] = error_norm # Print iterate messages printstrlist = [] printstrlist.append("{0:9d}".format(iterc + 1)) printstrlist.append(" {0:6.2f} / 360".format(error_angle)) printstrlist.append("{0:8.4f} / 1".format(error_scale)) printstrlist.append("{0:9.4f}".format(error_norm)) printstrlist.append("{0:9.4f}".format(la.norm(G_act))) printstr = ' | '.join(printstrlist) printout(printstr, textfile) t_end = time() printout('', textfile) printout('mean of error angle', textfile) printout('%f' % np.mean(error_angle_all), textfile) printout('mean of error scale', textfile) printout('%f' % np.mean(error_scale_all), textfile) printout('mean of error norm', textfile) printout('%f' % np.mean(error_norm_all), textfile) # printout('standard deviation of raw gradient estimate, entrywise',textfile) # printout('%f' % np.std(G_est_all,0),textfile) printout('average time per gradient estimate (s)', textfile) printout("%.3f" % ((t_end - t_start) / n_iterc), textfile) printout('', textfile) return G_act, G_est_all, error_angle_all, error_scale_all, error_norm_all