Exemplo n.º 1
0
    def show(self, name=None, sharex=False, sharey=False, running_average=False):
        if name is None:
            if len(self) > 1:
                fig, axs = plt.subplots(len(self), sharex=sharex, sharey=sharey)
                # Plot all graphs
                for i, (name, entry) in enumerate(self.entries.items()):
                    axs[i].plot(entry.x, entry.y)
                    if running_average:
                        axs[i].plot(entry.x, utils.running_average(entry.y))
                    axs[i].set_title(entry.name)
            else:
                for name, entry in self.entries.items():
                    plt.plot(entry.y)
                    if running_average:
                        plt.plot(entry.x, utils.running_average(entry.y))
                    plt.title(entry.name)

            plt.show()
        else:
            entry = self.entries[name]
            data = entry.y
            plt.plot(data)
            if running_average:
                plt.plot(utils.running_average(data))
            plt.title(entry.name)
            plt.show()
Exemplo n.º 2
0
def plot_rewards(df):
    rewards = episode_rewards(df)
    steps = df.groupby(['episode']).agg({'reward': 'count'})

    plt.plot(rewards, label='avg reward {:.02f}'.format(average_reward(df)))
    plt.plot(running_average(rewards), label='running avg')
    plt.plot(running_average(steps), label='steps')
    plt.xlabel('episode')
    plt.ylabel('reward')
    plt.title('rewards')
    plt.legend()
Exemplo n.º 3
0
def update_q():
    steps_episodes = []
    for episode in range(MAX_EPISODES):
        steps = 0
        s = env.reset()
        while True:
            # env.render()
            a = RL.choose_action(str(s))
            s_, r, done, info = env.step(a)
            steps += 1
            RL.learn(str(s), a, r, str(s_))
            s = s_

            if done:
                print('{0} -- {1}'.format(episode, steps))
                steps_episodes.append(steps)
                # env.render(1)
                break

    # end of game
    print(RL.q_table)
    # plot_steps(steps_episodes, save_path='./logs/q_learning/q_learning.png')
    average_steps = running_average(steps_episodes, interval=50)
    plot_steps(average_steps, save_path='./logs/q_learning/q_learning.png')
    np.save('./logs/q_learning/q_learning.npy', np.asarray(average_steps))
    env.destroy()
    print('game over')
Exemplo n.º 4
0
def update_dyna_q(n):
    steps_episodes = []
    for episode in range(MAX_EPISODES):
        steps = 0
        s = env.reset()
        while True:
            # env.render()
            a = RL.choose_action(str(s))
            s_, r, done, info = env.step(a)
            steps += 1
            RL.learn(str(s), a, r, str(s_))
            env_model.store_transition(str(s), a, r, s_)
            s = s_

            for i in range(n):  # learn 10 more times using the env_model
                ms, ma = env_model.sample_s_a()  # ms in here is a str
                mr, ms_ = env_model.get_r_s_(ms, ma)
                RL.learn(ms, ma, mr, str(ms_))

            if done:
                print('{0} -- {1}'.format(episode, steps))
                steps_episodes.append(steps)
                # env.render(1)
                break

    # end of game
    print(RL.q_table)
    # plot_steps(steps_episodes, save_path='./logs/dyna_q/dyna_q.png')
    average_steps = running_average(steps_episodes, interval=50)
    plot_steps(average_steps,
               save_path='./logs/dyna_q/dyna_q_{}.png'.format(n))
    np.save('./logs/dyna_q/dyna_q_{}.npy'.format(n), np.asarray(average_steps))
    env.destroy()
    print('game over')
Exemplo n.º 5
0
            mem.push((state, action, reward, next_state))

            state = next_state

    if mem.is_full:
        experience_replay()

    epsilon_history.append(epsilon)
    if epsilon > min_epsilon:
        epsilon *= 0.99

    # print(f'Episode {episode}: {sum(rewards)}')
    sum_rewards.append(sum(rewards))

    if episode % 10 == 9:
        fig, (ax0, ax1) = plt.subplots(2)

        ax0.set_title('Total reward for an episode')
        ax0.plot(sum_rewards)
        ax0.plot(running_average(sum_rewards))

        ax1.set_title('Epsilon')
        ax1.plot(epsilon_history)
        plt.show()

    if episode % 100 == 99:
        print(
            f'Ep {episode + 1} | Mean total reward across last 100 episodes: {np.mean(sum_rewards[-100:])}'
        )
Exemplo n.º 6
0
        # print(winner)

        if winner == 'BD':
            points_ac.append(points)
        else:
            points_ac.append(-points)
            points = -points

        for p in game.players:
            if isinstance(p, MyPlayer):
                loss += learn(p.probabilities, p.actions, points)

    rollout_points.append(sum(points_ac))

    loss /= rollouts
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if e % print_epochs == 0:
        print(
            f'Epoch {e}: Mean AC total {rollouts} rollouts points in last {print_epochs} episodes: {np.mean(rollout_points[-print_epochs:])}'
        )
        # plt.plot(points_ac)
        plt.plot(rollout_points)
        plt.plot(running_average(rollout_points))
        # plt.plot(points_bd)
        # plt.plot(running_average(points_bd))
        plt.show()
        saver.save()