示例#1
0
def main():
    """
    Train and evaluate agent.

    This function basically does the same as the checker that evaluates your agent.
    You can use it for debugging your agent and visualizing what it does.
    """
    from lunar_lander import LunarLander
    from gym.wrappers.monitoring.video_recorder import VideoRecorder

    env = LunarLander()

    agent = Agent(env)
    agent.train()

    rec = VideoRecorder(env, "policy.mp4")
    episode_length = 300
    n_eval = 100
    returns = []
    print("Evaluating agent...")

    for i in range(n_eval):
        print(f"Testing policy: episode {i+1}/{n_eval}")
        state = env.reset()
        cumulative_return = 0
        # The environment will set terminal to True if an episode is done.
        terminal = False
        env.reset()
        for t in range(episode_length):
            # if i <= 10:
            #     rec.capture_frame()
            # Taking an action in the environment
            action = agent.get_action(
                torch.as_tensor(state, dtype=torch.float32))
            state, reward, terminal = env.transition(action)
            cumulative_return += reward
            if terminal:
                break
        returns.append(cumulative_return)
        print(f"Achieved {cumulative_return:.2f} return.")
        # if i == 10:
        #     rec.close()
        #     print("Saved video of 10 episodes to 'policy.mp4'.")
    env.close()
    print(f"Average return: {np.mean(returns):.2f}")
示例#2
0
        "action": [],
        "terminal": [],
    }

    env = LunarLander()
    env.render()
    env.viewer.window.on_key_press = key_press
    env.viewer.window.on_key_release = key_release

    a = np.array([0])

    episode_rewards = []
    steps = 0
    while True:
        episode_reward = 0
        state = env.reset()
        state_img = env.render(
            mode="rgb_array")[::4, ::4, :]  # downsampling (every 4th pixel).

        while True:

            next_state, r, done, info = env.step(a[0])
            next_state_img = env.render(mode="rgb_array")[::4, ::4, :]

            episode_reward += r

            samples["state"].append(state)  # state has shape (8,)
            samples["state_img"].append(
                state_img)  # state_img has shape (100, 150, 3)
            samples["action"].append(np.array(a))
            samples["next_state"].append(next_state)
示例#3
0
def plot_io_bounds(x, y, vx, vy, theta, omega, a, steps, discrete=True):
    import matplotlib.pyplot as plt

    statebox = [x, y, vx, vy, theta, omega]
    centerstate = [box[0] + .5 * (box[1] - box[0]) for box in statebox]
    envstate = [i for i in centerstate]

    # Zero order hold on actions if needed
    if discrete and isinstance(a, int):
        a = a * np.ones(steps, dtype=np.int32)
    elif not discrete:
        a = [np.array(a) for i in range(steps)]

    # System IDed model trajectory
    centerstatehist = [centerstate]
    for i in range(steps):
        centerstate = lander_dynamics(*centerstate, a=a[i], discrete=discrete)
        centerstatehist.append(centerstate)

    # Actual openai gym model trajectory
    envstatehist = [envstate]
    if discrete:
        from lunar_lander import LunarLander
        env = LunarLander()
    else:
        from lunar_lander import LunarLanderContinuous
        env = LunarLanderContinuous()
    s = env.reset(envstate)
    for i in range(steps):
        s, _, _, _ = env.step(a[i])
        envstatehist.append(s[0:6])

    # Overapproximated trajectory
    stateboxhist = [statebox]
    for i in range(steps):
        statebox = lander_box_dynamics(*statebox,
                                       a=a[i],
                                       steps=1,
                                       discrete=discrete)
        stateboxhist.append(statebox)

    centerstatehist = np.array(centerstatehist)
    envstatehist = np.array(envstatehist)
    stateboxhist = np.array(stateboxhist)

    t = np.linspace(0, steps, steps + 1)
    fig, axs = plt.subplots(6, 1, figsize=(4, 9))

    # fig.set_size_inches(5,7,forward=True)

    limits = [[-1, 1], [0, 1], [-1, 1], [-1, 1], [-np.pi / 3, np.pi / 3],
              [-.5, .5]]
    for i in range(6):
        axs[i].fill_between(t,
                            stateboxhist[:, i, 0],
                            stateboxhist[:, i, 1],
                            alpha=0.3)
        axs[i].plot(centerstatehist[:, i], 'r')
        axs[i].plot(envstatehist[:, i], 'b.')
        axs[i].set_ylim(bottom=limits[i][0], top=limits[i][1])
        axs[i].set_yticks(np.linspace(limits[i][0], limits[i][1], 17),
                          minor=True)
        axs[i].grid(which='minor', alpha=.4)

    axs[0].set_title('Action {0}'.format(a))
    plt.show()
示例#4
0
文件: land.py 项目: imandr/KeRLas
from lunar_lander import LunarLander, FPS
import random, time, getopt
import numpy as np

np.set_printoptions(precision=3, suppress=True)

if __name__ == "__main__":

    env = LunarLander()
    dt = 1.0 / FPS

    obs = env.reset()
    env.render()
    done = False
    t = 0
    t0 = time.time()
    while not done and t < 500:
        a = random.randint(0, 3)
        s1, r, done, info = env.step(a)
        print(s1, r, info)
        #time.sleep(dt*10)
        env.render()
        t += 1
    print("rate:", t / (time.time() - t0))
示例#5
0
from lunar_lander import demo_heuristic_lander, LunarLander

total_reward_array = []

myLunarLander = LunarLander()

dorender = True
num_iters = 100
isdumb = True

for i in range(0, num_iters):
    end_reward = demo_heuristic_lander(myLunarLander,
                                       render=dorender,
                                       dumb=isdumb)
    total_reward_array.append(end_reward)
    myLunarLander.reset()
    print("Iteration: " + str(i))

print("Average Rewards Over " + str(num_iters) + " trials ")
average_reward = sum(total_reward_array) / len(total_reward_array)
print(average_reward)