示例#1
0
from RL_brain import DoubleDQN
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import matplotlib
from Res_plot import Res_plot

env = RIS_UAV()
res = Res_plot(env)
MEMORY_SIZE = 3200
Episodes = env.eps

sess = tf.Session()
with tf.variable_scope('Double_DQN_UAV'):
    double_DQN_UAV = DoubleDQN(
        n_actions=env.n_actions, n_features=env.n_features, memory_size=MEMORY_SIZE,
        e_greedy_increment=0.001, double_q=True,  ris=False, passive_shift=False, sess=sess, output_graph=True)

with tf.variable_scope('Double_DQN_FIX_RIS'):
    double_DQN_RIS_Fix = DoubleDQN(
        n_actions=env.n_actions, n_features=env.n_features, memory_size=MEMORY_SIZE,
        e_greedy_increment=0.001, double_q=True, ris=True, passive_shift=False, sess=sess, output_graph=True)

with tf.variable_scope('Double_DQN_RIS'):
    double_DQN_RIS = DoubleDQN(
        n_actions=env.n_actions, n_features=env.n_features, memory_size=MEMORY_SIZE,
        e_greedy_increment=0.001, double_q=True, ris=True, passive_shift=True, sess=sess, output_graph=True)

sess.run(tf.global_variables_initializer())

# record the results
示例#2
0
            action = RL.choose_action(observation)
            observation_,reward,done = env.step(action)
            RL.store_transition(observation, action, reward, observation_)
            if (step > 200) and (step % 5 == 0):
                RL.learn()

            # swap observation
            observation = observation_

            # break while loop when end of this episode
            if done:
                break
            step += 1

    print('game over')
    env.destroy()

if __name__ == '__main__':
    env = Maze()
    RL = DoubleDQN(env.n_actions, env.n_features,
                      learning_rate=0.01,
                      reward_decay=0.9,
                      e_greedy=0.9,
                      replace_target_iter=200,
                      memory_size=2000,
                      # output_graph=True
                      )

    env.after(100, run_maze)
    env.mainloop()
    RL.plot_cost()
示例#3
0
file=xlwt.Workbook(encoding='utf-8',style_compression=0)
sheet=file.add_sheet('aa')

t0= time.clock()

MyEngine = MyEngine()
MyEngine.QD_start()

MEMORY_SIZE = 500  #原值为3000
ACTION_SPACE = 3

sess = tf.Session()


RL = DoubleDQN(
        n_actions=ACTION_SPACE, n_features=2, memory_size=MEMORY_SIZE,

        e_greedy_increment=0.001, double_q=True, sess=sess, output_graph=True)

sess.run(tf.global_variables_initializer())




RL.load_net()

nk = 0
step = 0

cont = 1 # 存储数据用计数器
array = np.array([0], float)
observation_S = array
示例#4
0
from RL_brain import DoubleDQN
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

env = gym.make('Pendulum-v0')
env = env.unwrapped
env.seed(1)
MEMORY_SIZE = 3000
ACTION_SPACE = 11

sess = tf.Session()
with tf.variable_scope('Natural_DQN'):
    natural_DQN = DoubleDQN(n_actions=ACTION_SPACE,
                            n_features=3,
                            memory_size=MEMORY_SIZE,
                            e_greedy_increment=0.001,
                            double_q=False,
                            sess=sess)

with tf.variable_scope('Double_DQN'):
    double_DQN = DoubleDQN(n_actions=ACTION_SPACE,
                           n_features=3,
                           memory_size=MEMORY_SIZE,
                           e_greedy_increment=0.001,
                           double_q=True,
                           sess=sess,
                           output_graph=True)

sess.run(tf.global_variables_initializer())

示例#5
0
import gym
from RL_brain import DoubleDQN

env = gym.make('MountainCar-v0')
env = env.unwrapped
print(env.action_space,
      env.observation_space,
      env.observation_space.high,
      env.observation_space.low,
      sep='\n')

RL = DoubleDQN(n_actions=env.action_space.n,
               n_features=env.observation_space.shape[0],
               learning_rate=0.01,
               e_greedy=0.9,
               replace_target_iter=100,
               memory_size=2000,
               e_greedy_increment=0.001)

total_steps = 0

for i_episode in range(100):
    observation = env.reset()
    ep_r = 0

    while True:
        env.render()

        action = RL.choose_action(observation)
        observation_, reward, done, info = env.step(action)
        if total_steps > MEMORY_SIZE:
            RL.learn()

        if total_steps - MEMORY_SIZE > 20000:
            break

        observation = observation_
        total_steps += 1
    return RL.q


if __name__ == "__main__":
    DDQN = DoubleDQN(n_actions=ACTION_SPACE,
                     n_features=3,
                     learning_rate=0.01,
                     e_greedy=0.9,
                     replace_target_iter=100,
                     memory_size=2000,
                     e_greedy_increment=0.001,
                     double_q=True)
    q_double = train(DDQN)
    DQN = DoubleDQN(n_actions=ACTION_SPACE,
                    n_features=3,
                    learning_rate=0.01,
                    e_greedy=0.9,
                    replace_target_iter=100,
                    memory_size=2000,
                    e_greedy_increment=0.001,
                    double_q=False)
    q_nature = train(DQN)
    plt.plot(np.array(q_nature), c='r', label='natural')
    plt.plot(np.array(q_double), c='b', label='double')
示例#7
0
import gym
from RL_brain import DoubleDQN
import numpy as np
import matplotlib.pyplot as plt

env = gym.make("Pendulum-v0")
env = env.unwrapped
env.seed(1)
MEMORY_SIZE = 3000
ACTION_SPACE = 11

naturel_DQN = DoubleDQN(n_actions=ACTION_SPACE,
                        n_features=3,
                        memory_size=MEMORY_SIZE,
                        e_greedy_increment=0.001,
                        double_q=False)
double_DQN = DoubleDQN(n_actions=ACTION_SPACE,
                       n_features=3,
                       memory_size=MEMORY_SIZE,
                       e_greedy_increment=0.001,
                       double_q=True)


def train(RL):
    total_steps = 0
    observation = env.reset()
    while True:
        if total_steps - MEMORY_SIZE > 8000:
            env.render()  # show the game when trained for some time
        action = RL.choose_action(observation)
        f_action = (action - (ACTION_SPACE - 1) / 2) / (
env = env.unwrapped
env.seed(21)
MEMORY_SIZE = 10000
ACTION_SPACE = 3
FEATURE_SIZE = 2
print(env.action_space,
      env.observation_space,
      env.observation_space.high,
      env.observation_space.low,
      sep='\n')

sess = tf.Session()
with tf.variable_scope('Natural_DQN'):
    natural_DQN = DoubleDQN(n_actions=ACTION_SPACE,
                            n_features=FEATURE_SIZE,
                            memory_size=MEMORY_SIZE,
                            e_greedy_increment=0.001,
                            double_q=False,
                            sess=sess)

with tf.variable_scope('Prioritized_DQN'):
    prioritized_DQN = DQNPrioritizedReplay(n_actions=ACTION_SPACE,
                                           n_features=FEATURE_SIZE,
                                           memory_size=MEMORY_SIZE,
                                           e_greedy_increment=0.001,
                                           prioritized=True,
                                           sess=sess,
                                           output_graph=True)

sess.run(tf.global_variables_initializer())

示例#9
0
                episodes.append(episode)
                break
            step += 1   # 总步数

    # end of game
    print('game over')
    env.destroy()


if __name__ == "__main__":
    env = Maze()
    RL = DoubleDQN(env.n_actions,
                      env.n_features,#observation/state 的属性,如长宽高
                      learning_rate=0.01,
                      reward_decay=0.9,
                      dueling=True,
                      e_greedy=0.9,
                      replace_target_iter=200,  # 每 200 步替换一次 target_net 的参数
                      memory_size=2000, # 记忆上限
                      # output_graph=True   # 是否输出 tensorboard 文件
                      )
    env.after(100, run_maze)#进行强化学习训练
    env.mainloop()

    # 观看训练时间曲线
    his_double = np.vstack((episodes, steps))

    file = open('his_dueling_DDQN.pickle', 'wb')
    pickle.dump(his_double, file)
    file.close()

    plt.plot(his_double[0, :], his_double[1, :] - his_double[1, 0], c='b', label='Dueling DDQN')
示例#10
0
from RL_brain import DoubleDQN
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf


env = gym.make('Pendulum-v0')
env = env.unwrapped
env.seed(1)
MEMORY_SIZE = 3000
ACTION_SPACE = 11

sess = tf.Session()
with tf.variable_scope('Natural_DQN'):
    natural_DQN = DoubleDQN(
        n_actions=ACTION_SPACE, n_features=3, memory_size=MEMORY_SIZE,
        e_greedy_increment=0.001, double_q=True, sess=sess
    )

with tf.variable_scope('Double_DQN'):
    double_DQN = DoubleDQN(
        n_actions=ACTION_SPACE, n_features=3, memory_size=MEMORY_SIZE,
        e_greedy_increment=0.001, double_q=True, sess=sess, output_graph=True)

sess.run(tf.global_variables_initializer())
total_steps = 0
all_rewards = []
for i in range(400):
    s_c = env.reset()
    done = False
    step = 0
    reward = 0
示例#11
0
            if (step > 200) and (step % 5 == 0):
                RL.learn()

            # swap observation
            observation = observation_

            # break while loop when end of this episode
            if done:
                break
            step += 1

    print('game over')
    env.destroy()


if __name__ == '__main__':
    env = Maze()
    RL = DoubleDQN(
        env.n_actions,
        env.n_features,
        learning_rate=0.01,
        reward_decay=0.9,
        e_greedy=0.9,
        replace_target_iter=200,
        memory_size=2000,
        # output_graph=True
    )

    env.after(100, run_maze)
    env.mainloop()
    RL.plot_cost()