Exemplo n.º 1
0
    ax1.set_title("DQN")
    ax2.set_title("prio_DQN")
    ax3.set_title("prio_duel_DQN")

    plt.tight_layout()
    plt.savefig('result.jpg')
    # plt.show()


if __name__ == "__main__":
    DQN = DQNPrioritizedReplay(n_actions=ACTION_SPACE,
                               n_features=N_FEATURES,
                               learning_rate=0.001,
                               e_greedy=0.9,
                               replace_target_iter=REPLACE_TARGET_ITER,
                               memory_size=MEMORY_SIZE,
                               batch_size=BATCH_SIZE,
                               e_greedy_increment=0.00005,
                               prioritized=False,
                               dueling=False)

    prio_DQN = DQNPrioritizedReplay(n_actions=ACTION_SPACE,
                                    n_features=N_FEATURES,
                                    learning_rate=0.001,
                                    e_greedy=0.9,
                                    replace_target_iter=REPLACE_TARGET_ITER,
                                    memory_size=MEMORY_SIZE,
                                    batch_size=BATCH_SIZE,
                                    e_greedy_increment=0.00005,
                                    prioritized=True,
                                    dueling=False)
Exemplo n.º 2
0
from RL_brain import DQNPrioritizedReplay
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np

env = gym.make('MountainCar-v0')
env = env.unwrapped
env.seed(21)
MEMORY_SIZE = 10000

sess = tf.Session()
with tf.variable_scope('natural_DQN'):
    RL_natural = DQNPrioritizedReplay(
        n_actions=3,
        n_features=2,
        memory_size=MEMORY_SIZE,
        e_greedy_increment=0.00005,
        sess=sess,
        prioritized=False,
    )

with tf.variable_scope('DQN_with_prioritized_replay'):
    RL_prio = DQNPrioritizedReplay(
        n_actions=3,
        n_features=2,
        memory_size=MEMORY_SIZE,
        e_greedy_increment=0.00005,
        sess=sess,
        prioritized=True,
        output_graph=True,
    )
sess.run(tf.global_variables_initializer())
    entry_point='gym.envs.classic_control:CartPoleEnv',
    tags={'wrapper_config.TimeLimit.max_episode_steps': 10000.0},
    reward_threshold=10000.0,
    kwargs={'change_len': 1.5},
)
env_e = gym.make('CartPole_evaluate-v0')

MEMORY_SIZE = 10000
gpu_options = tf.GPUOptions(allow_growth=True)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

with tf.variable_scope('DQN'):
    RL_natural = DQNPrioritizedReplay(
        n_actions=env.action_space.n,
        n_features=np.shape(env.observation_space)[0],
        memory_size=MEMORY_SIZE,
        e_greedy_increment=0.0001,
        sess=sess,
        prioritized=False,
    )

sess.run(tf.global_variables_initializer())


def train(RL, steps_limit):
    # env.render()
    steps_num = 0
    solved = False
    sumreward = 1
    account = 0
    sess.run(tf.global_variables_initializer())
    RL.initiate_common_par()
Exemplo n.º 4
0
        # 完成任务,或者 陷入局部最优失败
        if done:
            env.close()
            print("success!")
            break
        if try_action_count > 1000:
            print("failed!")
            break


if __name__ == "__main__":
    RL_prio = DQNPrioritizedReplay(n_actions=3,
                                   n_features=2,
                                   learning_rate=0.01,
                                   e_greedy=0.9,
                                   replace_target_iter=100,
                                   memory_size=MEMORY_SIZE,
                                   e_greedy_increment=0.00005,
                                   prioritized=True,
                                   dueling=False)
    cost_prio, his_prio = train(RL_prio)

    RL_dueling = DQNPrioritizedReplay(n_actions=3,
                                      n_features=2,
                                      learning_rate=0.01,
                                      e_greedy=0.9,
                                      replace_target_iter=100,
                                      memory_size=MEMORY_SIZE,
                                      e_greedy_increment=0.00005,
                                      prioritized=False,
                                      dueling=True)
Exemplo n.º 5
0
            action = RL.choose_action(observation)
            observation_,reward,done = env.step(action)
            RL.store(observation, action, reward, observation_)
            if (step > 200) and (step % 5 == 0):
                RL.learn()

            # swap observation
            observation = observation_

            # break while loop when end of this episode
            if done:
                break
            step += 1

    print('game over')
    env.destroy()

if __name__ == '__main__':
    env = Maze()
    RL = DQNPrioritizedReplay(env.n_actions, env.n_features,
                      learning_rate=0.01,
                      reward_decay=0.9,
                      e_greedy=0.9,
                      replace_target_iter=200,
                      memory_size=2000,
                      # output_graph=True
                      )

    env.after(100, run_maze)
    env.mainloop()
    RL.plot_cost()
            if (step > 200) and (step % 5 == 0):
                RL.learn()

            # swap observation
            observation = observation_

            # break while loop when end of this episode
            if done:
                break
            step += 1

    print('game over')
    env.destroy()


if __name__ == '__main__':
    env = Maze()
    RL = DQNPrioritizedReplay(
        env.n_actions,
        env.n_features,
        learning_rate=0.01,
        reward_decay=0.9,
        e_greedy=0.9,
        replace_target_iter=200,
        memory_size=2000,
        # output_graph=True
    )

    env.after(100, run_maze)
    env.mainloop()
    RL.plot_cost()
import gym
from RL_brain import DQNPrioritizedReplay
import matplotlib.pyplot as plt
import numpy as np

env = gym.make("MountainCar-v0")
env = env.unwrapped
env.seed(21)
MEMORY_SIZE = 10000

RL_natural = DQNPrioritizedReplay(n_actions=3,
                                  n_features=2,
                                  memory_size=MEMORY_SIZE,
                                  e_greedy_increment=0.00005,
                                  prioritized=False)
RL_prio = DQNPrioritizedReplay(n_actions=3,
                               n_features=2,
                               memory_size=MEMORY_SIZE,
                               e_greedy_increment=0.00005,
                               prioritized=True)


def train(RL):
    total_steps = 0
    steps = []
    episodes = []
    for i_episode in range(20):
        observation = env.reset()
        while True:
            # print("episode: {} | total_steps: {}".format(i_episode, total_steps))
            # if total_steps - MEMORY_SIZE > 8000: env.render()
Exemplo n.º 8
0
    model.load_model(model_path)

    env = Tetris(width=TETRIS_WIDTH,
                 height=TETRIS_HEIGHT,
                 block_size=TETRIS_BLOCK_SIZE)
    env.reset()

    if torch.cuda.is_available():
        model.cuda()

    while True:
        next_steps = env.get_next_states()
        next_actions, next_states = zip(*next_steps.items())
        action, _ = model.choose_action(next_actions,
                                        next_states,
                                        is_random=False)
        _, done = env.step(action, render=True)

        if done:
            print("Cleared: {}".format(env.cleared_lines))
            break


if __name__ == "__main__":
    prio_duel_DQN = DQNPrioritizedReplay(n_actions=ACTION_SPACE,
                                         n_features=N_FEATURES,
                                         test=True)
    model_path = "model/dqn_tetris.pkl"
    test(prio_duel_DQN, model_path)
Exemplo n.º 9
0
reload(lh)
playerComputer = lh.BasicOpponent()

env = lh.LaserHockeyEnv(mode=0)
MEMORY_SIZE = 100000
Ep_max = 10000
Step_max = 500

sess = tf.Session()

with tf.variable_scope('DQN_with_prioritized_replay'):
    RL_prio = DQNPrioritizedReplay(
        n_actions=8,
        n_features=16,
        memory_size=MEMORY_SIZE,
        e_greedy_increment=None,
        sess=sess,
        prioritized=True,
        output_graph=True,
    )
sess.run(tf.global_variables_initializer())


def train(RL):
    global GLOBAL_RUNNING_R
    total_steps = 0
    steps = []
    episodes = []
    cost_his = []
    for i_episode in range(Ep_max):
        ep_reward = 0