示例#1
0
def exercise_agent(gymName, episodes, render=True, convolutional=False):
    max_t = 0
    env = gym.make(gymName)
    agent = QAgent(env.action_space.n, convolutional)
    for i_episode in range(episodes):
        state = normalize(env.reset())
        agent.observe(state, 0, False)

        total_reward = 0
        for t in range(10000):
            if render:
                env.render()

            action = agent.act()
            state, reward, done, info = env.step(action)
            state = normalize(state)
            total_reward += reward
            agent.observe(state, reward, done)
            if done:
                max_t = max(max_t, t)
                print(f'{t} : {max_t} : {total_reward}')
                break
    env.close()
示例#2
0
env = BananaEnv()

agent = QAgent(action_space=env.get_action_space_size(),
               state_space=env.get_state_space_size())
if load_bad_network:
    agent.load_checkpoint(bad_opt_networks_ckp)
elif load_from_seeded_64:  #target and delayer weights are not actually needed here, they would be needed to resume training as it was left.
    agent.load_checkpoint(local_checkpoint=seeded_test_64_ckp["local"],
                          target_checkpoint=seeded_test_64_ckp["target"],
                          delayer_checkpoint=seeded_test_64_ckp["delayer"])
else:
    agent.load_checkpoint(top_opt_networks_ckp[sel_network])

env.reset()
done = False
for i in range(5):
    print("Episode {:d}\n score: ".format(i), end=" ")
    done = False
    env.reset()
    score = 0
    while not done:
        exp = agent.act(env, 0)
        score = score + exp.reward
        if abs(exp.reward) > 0.01:
            print(str(int(score)), sep=' ', end=' ', flush=True)
        done = exp.done
        sleep(0.02)
    print("\nfinal score:" + str(score) + "\n")
    sleep(1)
示例#3
0
 score_list = []
 mean_score_list = []
 running_score = 0
 eps_start = 1.0
 eps_decay = pars["eps_decay_sel"]
 eps_end = 0.01
 eps = eps_start
 max_ep_len = 400
 train_episodes = 701
 for episode in range(train_episodes):
     eps = max(eps * (eps_decay), eps_end)
     env.reset()
     done = False
     curr_score = 0
     for act_i in range(max_ep_len):
         exp = agent.act(env, eps)
         curr_score = curr_score + exp.reward
         if exp.done:
             break
         if act_i % learn_every == 0:
             agent.learn(64)
     score_list.append(curr_score)
     score_window.append(curr_score)
     if episode % update_every == 0:
         agent.update_target()
     if episode % 20 == 0:
         print("episode " + str(episode) + ", mean score: " +
               str(np.mean(score_window)))
     if episode % 100 == 0:
         mean_score_list.append(np.mean(score_window))
 print("test completed with scores: " + str(mean_score_list))
示例#4
0
import gym
from q_agent import QAgent

env = gym.make('FrozenLake-v0')
print(env.action_space)
print(env.observation_space)

agent = QAgent(env.observation_space, env.action_space);
agent.learn(env)
success = 0
for i_episode in range(100):
        observation = env.reset()
        while True:
                #env.render()
                action = agent.act(observation)
                observation, reward, done, info = env.step(action)
                if done:
                        #print("Episode finished after {} timesteps".format(t+1))
                        if reward == 1.0:
                                success += 1
                        break
print("success rate is {}".format(success))