def test_fitted_value_iteration_pendulum(self): plant = PendulumPlant() simulator = Simulator(plant) def quadratic_regulator_cost(context): x = context.get_continuous_state_vector().CopyToVector() x[0] = x[0] - math.pi u = plant.EvalVectorInput(context, 0).CopyToVector() return x.dot(x) + u.dot(u) qbins = np.linspace(0., 2. * math.pi, 51) qdotbins = np.linspace(-10., 10., 41) state_grid = [set(qbins), set(qdotbins)] input_limit = 2. input_mesh = [set(np.linspace(-input_limit, input_limit, 9))] timestep = 0.01 [Q, Qdot] = np.meshgrid(qbins, qdotbins) fig = plt.figure() ax = fig.gca(projection='3d') def draw(iteration, mesh, cost_to_go, policy): # Drawing is slow, don't draw every frame. if iteration % 20 != 0: return plt.title("iteration " + str(iteration)) J = np.reshape(cost_to_go, Q.shape) surf = ax.plot_surface(Q, Qdot, J, rstride=1, cstride=1, cmap=color_map.jet) plt.pause(0.00001) surf.remove() options = DynamicProgrammingOptions() options.convergence_tol = 1. options.state_indices_with_periodic_boundary_conditions = {0} options.visualization_callback = draw policy, cost_to_go = FittedValueIteration(simulator, quadratic_regulator_cost, state_grid, input_mesh, timestep, options)
def test_fitted_value_iteration_pendulum(self): plant = PendulumPlant() simulator = Simulator(plant) def quadratic_regulator_cost(context): x = context.get_continuous_state_vector().CopyToVector() x[0] = x[0] - math.pi u = plant.EvalVectorInput(context, 0).CopyToVector() return x.dot(x) + u.dot(u) qbins = np.linspace(0., 2.*math.pi, 51) qdotbins = np.linspace(-10., 10., 41) state_grid = [set(qbins), set(qdotbins)] input_limit = 2. input_mesh = [set(np.linspace(-input_limit, input_limit, 9))] timestep = 0.01 [Q, Qdot] = np.meshgrid(qbins, qdotbins) fig = plt.figure() ax = fig.gca(projection='3d') def draw(iteration, mesh, cost_to_go, policy): # Drawing is slow, don't draw every frame. if iteration % 20 != 0: return plt.title("iteration " + str(iteration)) J = np.reshape(cost_to_go, Q.shape) surf = ax.plot_surface(Q, Qdot, J, rstride=1, cstride=1, cmap=color_map.jet) plt.pause(0.00001) surf.remove() options = DynamicProgrammingOptions() options.convergence_tol = 1. options.state_indices_with_periodic_boundary_conditions = {0} options.visualization_callback = draw policy, cost_to_go = FittedValueIteration(simulator, quadratic_regulator_cost, state_grid, input_mesh, timestep, options)
def test_fitted_value_iteration_pendulum(self): plant = PendulumPlant() simulator = Simulator(plant) def quadratic_regulator_cost(context): x = context.get_continuous_state_vector().CopyToVector() x[0] = x[0] - math.pi u = plant.EvalVectorInput(context, 0).CopyToVector() return x.dot(x) + u.dot(u) # Note: intentionally under-sampled to keep the problem small qbins = np.linspace(0., 2.*math.pi, 11) qdotbins = np.linspace(-10., 10., 11) state_grid = [set(qbins), set(qdotbins)] input_limit = 2. input_mesh = [set(np.linspace(-input_limit, input_limit, 5))] timestep = 0.01 num_callbacks = [0] def callback(iteration, mesh, cost_to_go, policy): # Drawing is slow, don't draw every frame. num_callbacks[0] += 1 options = DynamicProgrammingOptions() options.convergence_tol = 1. options.periodic_boundary_conditions = [ PeriodicBoundaryCondition(0, 0., 2.*math.pi) ] options.visualization_callback = callback options.input_port_index = InputPortSelection.kUseFirstInputIfItExists options.assume_non_continuous_states_are_fixed = False policy, cost_to_go = FittedValueIteration(simulator, quadratic_regulator_cost, state_grid, input_mesh, timestep, options) self.assertGreater(num_callbacks[0], 0)
def test_fitted_value_iteration_pendulum(self): plant = PendulumPlant() simulator = Simulator(plant) def quadratic_regulator_cost(context): x = context.get_continuous_state_vector().CopyToVector() x[0] = x[0] - math.pi u = plant.EvalVectorInput(context, 0).CopyToVector() return x.dot(x) + u.dot(u) # Note: intentionally under-sampled to keep the problem small qbins = np.linspace(0., 2.*math.pi, 11) qdotbins = np.linspace(-10., 10., 11) state_grid = [set(qbins), set(qdotbins)] input_limit = 2. input_mesh = [set(np.linspace(-input_limit, input_limit, 5))] timestep = 0.01 num_callbacks = [0] def callback(iteration, mesh, cost_to_go, policy): # Drawing is slow, don't draw every frame. num_callbacks[0] += 1 options = DynamicProgrammingOptions() options.convergence_tol = 1. options.periodic_boundary_conditions = [ PeriodicBoundaryCondition(0, 0., 2.*math.pi) ] options.visualization_callback = callback policy, cost_to_go = FittedValueIteration(simulator, quadratic_regulator_cost, state_grid, input_mesh, timestep, options) self.assertGreater(num_callbacks[0], 0)
plt.title("iteration " + str(iteration)) J = np.reshape(cost_to_go, Q.shape) surf = ax.plot_surface(Q, Qdot, J, rstride=1, cstride=1, cmap=cm.jet) Pi = np.reshape(policy, Q.shape) surf2 = ax2.plot_surface(Q, Qdot, Pi, rstride=1, cstride=1, cmap=cm.jet) if plt.get_backend() != u"template": plt.draw_all() plt.pause(0.00001) surf.remove() surf2.remove() options.visualization_callback = draw policy, cost_to_go = FittedValueIteration(simulator, cost_function, state_grid, input_grid, timestep, options) J = np.reshape(cost_to_go, Q.shape) surf = ax.plot_surface(Q, Qdot, J, rstride=1, cstride=1, cmap=cm.jet) Pi = np.reshape(policy.get_output_values(), Q.shape) surf = ax2.plot_surface(Q, Qdot, Pi, rstride=1, cstride=1, cmap=cm.jet) # animate the resulting policy. builder = DiagramBuilder() plant = builder.AddSystem(DoubleIntegrator()) logger = LogOutput(plant.get_output_port(0), builder) vi_policy = builder.AddSystem(policy) builder.Connect(vi_policy.get_output_port(0), plant.get_input_port(0))
J = np.reshape(cost_to_go, Q.shape) surf = ax.plot_surface(Q, Qdot, J, rstride=1, cstride=1, cmap=cm.jet) Pi = np.reshape(policy, Q.shape) surf2 = ax2.plot_surface(Q, Qdot, Pi, rstride=1, cstride=1, cmap=cm.jet) if plt.get_backend() != u'template': plt.draw_all() plt.pause(0.00001) surf.remove() surf2.remove() options.visualization_callback = draw policy, cost_to_go = FittedValueIteration(simulator, cost_function, state_grid, input_grid, timestep, options) J = np.reshape(cost_to_go, Q.shape) surf = ax.plot_surface(Q, Qdot, J, rstride=1, cstride=1, cmap=cm.jet) Pi = np.reshape(policy.get_output_values(), Q.shape) surf = ax2.plot_surface(Q, Qdot, Pi, rstride=1, cstride=1, cmap=cm.jet) # Animate the resulting policy. builder = DiagramBuilder()