示例#1
0
    print("Average time steps per episode: {total_epochs / episodes}")


# Load a Windy GridWorld environment
env_name = "LunarLander-v2"
env = gym.make(env_name)

random_agent = RandomDQNAgent(env_name, env, 1000, is_state_box=True, memory_capacity=100000)
# random_agent.train()

agent = DQNAgent(env_name, env, 5000, learning_rate=0.00025, start_epsilon=1.0, discount_factor=0.99, decay_rate=0.0001,
                 make_checkpoint=True, is_state_box=True, batch_size=64, memory_capacity=100000)
# agent.memory = random_agent.memory
# agent.train()

weights, rewards, episode_len = agent.load("/home/dsalwala/NUIG/Thesis/rl-algos/data/LunarLander-v2_100.npy")
stats = plotting.EpisodeStats(
    episode_lengths=episode_len,
    episode_rewards=rewards)

# Search for a Q values
# nn, stats = agent.nn.get_weights(), agent.stats

nn = ANN(8, 4, 0.00025)
nn.set_weights(weights)
play_episode(env, nn, 1)

env.close()

plotting.plot_episode_stats(stats)
示例#2
0
agent = DQNAgent(env_name,
                 env,
                 5000,
                 learning_rate=0.00025,
                 start_epsilon=1.0,
                 discount_factor=0.99,
                 decay_rate=0.0001,
                 make_checkpoint=True,
                 is_state_box=True,
                 batch_size=64,
                 memory_capacity=100000)
# agent.memory = random_agent.memory
# agent.train()

weights, rewards, episode_len = agent.load(
    "/home/dsalwala/NUIG/Thesis/rl-algos/data/CartPole-v0_3000.npy")
stats = plotting.EpisodeStats(episode_lengths=episode_len,
                              episode_rewards=rewards)

# Search for a Q values
# nn, stats = agent.nn, agent.stats

nn = ANN(4, 2, 0.00025)
nn.set_weights(weights)
play_episode(env, nn, 1)

env.close()

plotting.plot_episode_stats(stats)