示例#1
0
def main():
    print("Start Atari games")
    environment_name = "PongNoFrameskip-v4"
    env = make_env(environment_name)
    best_score = -np.inf
    load_checkpoint = False
    n_games = 500
    lr = 0.0001
    epsilon = 1
    gamma = 0.99
    input_dims = env.observation_space.shape
    n_actions = env.action_space.n
    eps_min = 0.01
    eps_dec = 5e-7
    replace = 1000
    algo = None
    mem_size = 50000
    batch_size = 32
    chkpt_dir = "models/"
    algo = "DeepQAgent"
    agent = DeepQAgent(lr, n_actions, input_dims, chkpt_dir, epsilon, gamma,
                       mem_size, batch_size, eps_min, eps_dec, replace, algo,
                       environment_name)
    if load_checkpoint:
        agent.load_models()
    fname = agent.algo + "_" + agent.env_name + '_lr' + str(
        agent.lr) + "_" + str(n_games) + "_games"
    figure_file = "plots/" + fname + ".png"
    n_steps = 0
    scores, eps_history, steps_array = [], [], []
    for i in range(n_games):
        done = False
        score = 0
        observation = env.reset()
        while not done:
            action = agent.get_action(observation)
            new_observation, reward, done, info = env.step(action)
            score += reward
            if not load_checkpoint:
                agent.store_transition(observation, action, reward,
                                       new_observation, int(done))
                agent.learn()
            observation = new_observation
            n_steps += 1
        scores.append(score)
        steps_array.append(n_steps)
        avg_score = np.mean(scores[-100:])
        print(
            "episode ", i + 1, "score: ", score,
            "average score %.1f best score %.1f epsilon %.2f" %
            (avg_score, best_score, agent.epsilon), " steps ", n_steps)
        if avg_score > best_score:
            if not load_checkpoint:
                agent.save_models()
            best_score = avg_score
        eps_history.append(agent.epsilon)
    plot_learning_curve(steps_array, scores, eps_history, figure_file)
    print("End Atari games")
示例#2
0
n_episodes = 10000
win_pct_list = []
scores = []
eps_history = []

for i in range(n_episodes):
    done = False
    score = 0
    s = env.reset()
    done = False

    while not done:
        a = A.pick_action(s)
        s_, r, done, info = env.step(a)
        A.learn(s, a, r, s_)
        score += r
        s = s_

    scores.append(score)
    eps_history.append(A.eps)
    if i % 100 == 0:
        win_pct = np.mean(scores[-100:])
        win_pct_list.append(win_pct)
        if i % 1000 == 0:
            print('episode', i, 'win pct %.2f' % win_pct, 'eps %2.f' % A.eps)

#plt.plot(win_pct_list)
#plt.show()

x = [i + 1 for i in range(n_episodes)]