def cartpole():
    model = InvertedPendulum()
    observation_space = 4
    action_space = 1
    dqn_solver = DQNSolver(observation_space, action_space)
    run = 0
    while True:
        run += 1
        state = model.state
        state = np.reshape(state, [1, observation_space])
        step = 0
        while True:
            step += 1
            action = dqn_solver.act(state)
            state_next, reward, terminal, info = model.step_rl(action)
            reward = reward if not terminal else -reward
            state_next = np.reshape(state_next, [1, observation_space])
            dqn_solver.remember(state, action, reward, state_next, terminal)
            state = state_next
            if terminal:
                print("Run: " + str(run) + ", exploration: " +
                      str(dqn_solver.exploration_rate) + ", score: " +
                      str(step))
                break
            dqn_solver.experience_replay()
示例#2
0
from src.InvertedPendulum import *
from src.Simulator import *
from src.controllers.PolePlacementController import *

# This script shows the behavior of the pendulum controlled by a pole placement controller

if __name__ == "__main__":

    # Import model
    model = InvertedPendulum()

    # Set initial state [m, m/s, rad, rad/s]
    model.set_state(np.mat([[-0.3], [0.0], [0.1], [0.0]]))

    # Define controller
    controller = Control(model)

    # Set desired cart position (m)
    controller.set_desired_position(0.3)

    # Simulator
    sim = Simulator(model, controller)
    t, state_list = sim.simulate()

    # Plot data
    plt.figure()
    plt.subplot(2, 2, 1)
    plt.plot(t, state_list[:, 0])
    plt.xlabel("Time (s)")
    plt.ylabel("x (m)")
    plt.title("Cart position")
示例#3
0
from src.InvertedPendulum import *
import casadi as ca
import matplotlib.pyplot as plt

# Trajectory parameters
T = 2
N = 25
hk = T / N
umax = 20
dmax = 2
d = 1

# Import pendulum model
model = InvertedPendulum()

# Initial state
x0 = [0, 0, 0, 0]
xd = [0, 0, -pi / 4, 0]

# Define decision variables
u = ca.SX.sym('u', 1, N)
x = ca.SX.sym('x', 4, N)
w = ca.vertcat(u, x)

# Define objective
cost = 0
for k in range(N - 1):
    cost += (hk / 2) * (u[:, k]**2 + u[:, k + 1]**2)

# Create collocation constraints
constr = []
    ax1.grid()

    # Trajectory parameters
    T = 2
    N = 25
    hk = T / N
    umax = 20
    dmax = 2
    d = 1

    # Initial state
    x0 = [0, 0, -pi / 4, 0]
    xd = [1, 0, 0, 0]

    # Import pendulum model
    model = InvertedPendulum()
    model.set_state(np.reshape(x0, (4, 1)))
    model.dt = hk

    # Define decision variables
    u = cvxpy.Variable((1, N))
    x = cvxpy.Variable((4, N))

    # Define objective
    cost = 0
    for k in range(N - 1):
        cost += (hk / 2) * (u[:, k]**2 + u[:, k + 1]**2)
    objective = cvxpy.Minimize(cost)

    # Create collocation constraints
    constr = []