Пример #1
0
            if (step > 200) and (step % 5 == 0):
                RL.learn()

            # swap observation
            observation = observation_

            ## break while loop when end of this episode
            #if done:
            #break
            step += 1
            time.sleep(60)


if __name__ == "__main__":
    # maze game
    env = Maze()
    RL = DeepQNetwork(
        env.n_actions,
        env.n_features,
        learning_rate=0.01,
        reward_decay=0.9,
        e_greedy=0.9,
        replace_target_iter=200,
        memory_size=2000,
        # output_graph=True
    )
    env.after(100, run_maze)
    env.mainloop()
    RL.plot_cost()
Пример #2
0
            else:
                action = RL_1.choose_action(observation)
                print('AI_1 action:'+str(action+1))
                # action = int(input('')) - 1
                observation_, reward, done, _ = env.step(action)

            # # 将下一个 state_ 变为 下次循环的 state
            observation = observation_
            # 如果终止, 就跳出循环
            if done:
                print('Reward:'+str(reward))
                env.render()
                input('Press Enter to continue')
                break
            
            time.sleep(0.5)
        
if __name__ == '__main__':
    env = TTTEnv()
    RL_0 = DeepQNetwork(
        env.n_action, env.n_features,
        'player_0', 'player_1',
        e_greedy = 1,
    )
    RL_1 = DeepQNetwork(
        env.n_action, env.n_features,
        'player_1', 'player_0',
        e_greedy = 1,
    )
    play()
    

import gym
from RL_brain import DeepQNetwork

env = gym.make('CartPole-v1')
env = env.unwrapped

print(env.action_space)
print(env.observation_space)
print(env.observation_space.high)
print(env.observation_space.low)

RL = DeepQNetwork(n_actions=env.action_space.n,
                  n_features=env.observation_space.shape[0],
                  learning_rate=0.01, e_greedy=0.9,
                  replace_target_iter=100, memory_size=2000,
                  e_greedy_increment=0.001,)

total_steps = 0


for i_episode in range(100):

    observation = env.reset()
    ep_r = 0
    while True:
        env.render()

        action = RL.choose_action(observation)
Пример #4
0
            if (step > 200) and (step % 5 == 0):
                RL.learn()

            # 将下一个 state_ 变为 下次循环的 state
            observation = observation_

            # 如果终止, 就跳出循环
            if done:
                break
            step += 1   # 总步数

    # end of game
    print('game over')
    env.destroy()


if __name__ == "__main__":
    env = Maze()
    RL = DeepQNetwork(env.n_actions, env.n_features,
                      learning_rate=0.01,
                      reward_decay=0.9,
                      e_greedy=0.9,
                      replace_target_iter=200,  # 每 200 步替换一次 target_net 的参数
                      memory_size=2000, # 记忆上限
                      # output_graph=True   # 是否输出 tensorboard 文件
                      )
    env.after(100, run_maze)
    env.mainloop()
    RL.plot_cost()  # 观看神经网络的误差曲线

    
import gym
from RL_brain import DeepQNetwork

env = gym.make('CartPole-v0')
env = env.unwrapped
RL = DeepQNetwork(env.observation_space.shape[0],
                  learning_rate=0.01,
                  reward_decay=0.9,
                  e_greedy=0.9)

for episode in range(1000):
    observation = env.reset()
    ep_r = 0
    while True:
        env.render()
        action = RL.choose_action(observation)
        observation_, reward, done, info = env.step(action)

        # the smaller theta and closer to center the better
        x, x_dot, theta, theta_dot = observation_
        r1 = (env.x_threshold - abs(x)) / env.x_threshold - 0.8
        r2 = (env.theta_threshold_radians -
              abs(theta)) / env.theta_threshold_radians - 0.5
        reward = r1 + r2

        ep_r += reward
        RL.learn(observation, action, reward, observation_)
        observation = observation_
        if done:
            print('ep_r: ', ep_r, ' reward: ', reward)
            break
Пример #6
0
    observation = env.reset()
    while True:
        action = RL.choose_action2(observation)
        observation_, reward, done = env.step(action)
        print(observation, observation_)
        env.path(observation, observation_)
        observation = observation_
        if done:
            break
    time.sleep(5)
    env.destroy()


if __name__ == "__main__":
    # maze game
    env = Maze()
    RL = DeepQNetwork(
        env.n_actions,
        env.n_features,
        learning_rate=0.01,
        reward_decay=0.9,
        e_greedy=0.9,  #0.7 nono
        hidden_layers=[10, 10],  #[10,10]
        replace_target_iter=500,  #500
        memory_size=5000,  #5000
        output_graph=True)
    env.after(100, run_maze)
    env.mainloop()
    RL.plot_cost()
    # RL.write_cost()
Пример #7
0
    time_string = utils.get_string_time()
    print(time_string, " the test begins")
    start_time = time.clock()

    # maze game
    env = Maze()

    RL = DeepQNetwork(
        env.n_actions,
        env.n_features,
        learning_rate=0.01,
        reward_decay=0.9,
        e_greedy=0.98,  # 最终不再探索
        replace_target_iter=300,
        memory_size=4800,
        e_greedy_origin=0.5,
        e_greedy_increment=0.0001,  # epsilon增长速度
        model_load=True,
        model_load_dir="save/2018-3-30-22:17/model.ckpt",
        model_save_dir="save/{time}/model.ckpt".format(time=time_string),
        output_graph=False,
    )
    env.after(100, run_maze)
    env.mainloop()
    if model_save:
        RL.model_saver()
    end_time = time.clock()
    print("spend time: %f s" % (end_time - start_time))
    RL.plot_cost()
    # env.plot_error_change()
import gym
from RL_brain import DeepQNetwork

env = gym.make('MountainCar-v0')
env = env.unwrapped

print(env.action_space)
print(env.observation_space)
print(env.observation_space.high)
print(env.observation_space.low)

RL = DeepQNetwork(
    n_actions=3,
    n_states=2,
    learning_rate=0.001,
    epsilon_greedy=0.9,
    replace_target_iter=300,
    memory_size=3000,
    epsilon_greedy_increment=0.0002,
)

total_steps = 0

for i_episode in range(100):

    observation = env.reset()
    ep_r = 0
    while True:
        env.render()

        action = RL.choose_action(observation)
Пример #9
0
The mountain car example
"""

import gym
from RL_brain import DeepQNetwork

env = gym.make('MountainCar-v0')
print(env.action_space)
print(env.observation_space)
print(env.observation_space.high)
print(env.observation_space.low)

RL = DeepQNetwork(n_actions=3,
                  n_features=2,
                  learning_rate=0.0005,
                  e_greedy=0.9,
                  replace_target_iter=300,
                  memory_size=3000,
                  e_greedy_increment=0.001,
                  hidden_layers=[20, 20])

total_steps = 0

for i_episode in range(10):

    observation = env.reset()

    while True:
        env.render()

        action = RL.choose_action(observation)
Пример #10
0
#from tos_env import Tos
from RL_brain import DeepQNetwork
from tos_env import Tos

import gym
import numpy as np
import argparse
import Maps

# command arguments
parser = argparse.ArgumentParser()
parser.add_argument('-map', type=int, help='Select the initial map you want. :)')
args = parser.parse_args()

if __name__ == '__main__':

    # build the virtual environment
    map_index = args.map
    env = Tos(Maps.maps[map_index])

    # build the neural network
    brain = DeepQNetwork(env)
    #brain.learn()
    brain.run_test('Weights//second_version_weights.h5f')
Пример #11
0
            step += 1

        # add to list
        mean.append(env.asset - 10000)

        if episode % 10 == 0:
            print('Episode %d' % episode)
        print(mean[-1])

    # calculate mean
    print(np.mean(mean), np.var(mean))
    plt.scatter(range(len(mean)), mean)
    plt.show()

    # end of game
    print('game over')


if __name__ == "__main__":
    RL = DeepQNetwork(
        11,
        8,
        learning_rate=0.005,
        reward_decay=0.9,
        e_greedy=0.9,
        replace_target_iter=200,
        memory_size=2000,
        # output_graph=True
    )
    run_stock()
            # break while loop when end of this episode
            if done:
                break
            step += 1

    # end of game
    print('game over')
    env.destroy()


if __name__ == "__main__":
    # maze game
    env = Maze()
    # RL方法选择DQN
    RL = DeepQNetwork(env.n_actions, env.n_features,
                      learning_rate=0.01,    # 学习效率设为0.01()
                      reward_decay=0.9,      # 预计回报衰减
                      e_greedy=0.9,          # 选择最大Q值对应的动作的概率
                      replace_target_iter=200,   # 每隔200步替换一次target_net的参数
                      memory_size=2000,     # 记忆上限
                      output_graph=True,   # 输出神经网络训练模型
                      restore_network=False,
                      save_network=False
                      )
    RL.restore_net()
    env.after(100, run_maze)  # after语句可以实现定时器循环
    env.mainloop()  # mainloop就进入到事件(消息)循环
    save_path = RL.save_net()
    RL.plot_cost()  # 观看神经网络的误差曲线
Пример #13
0
import gym
from RL_brain import DeepQNetwork

env = gym.make('MountainCar-v0')
env = env.unwrapped
RL = DeepQNetwork(n_features=2,
                  learning_rate=0.01,
                  reward_decay=0.9,
                  e_greedy=0.9)

for episode in range(10):
    observation = env.reset()
    ep_r = 0
    while True:
        env.render()
        action = RL.choose_action(observation)
        observation_, reward, done, info = env.step(action)

        # the higher the better
        position, velocity = observation_
        reward = abs(position - (-0.5))  # r in [0, 1]

        ep_r += reward
        RL.learn(observation, action, reward, observation_)
        observation = observation_
        if done:
            print('ep_r: ', ep_r, ' reward: ', reward)
            break
Пример #14
0
import numpy as np

if __name__ == "__main__":
    # maze game
    game = Game()
    game.init()
    # number of observation * 2dir
    n_feature = (len(game.enemylist) + 1) * 2
    # number of action is 4 dir
    n_action = 4

    RL = DeepQNetwork(
        4,
        n_feature,
        learning_rate=0.01,
        reward_decay=0.9,
        e_greedy=0.9,
        replace_target_iter=200,
        memory_size=2000,
        # output_graph=True
    )

    while True:
        step = 0
        game.init()
        game.render()
        observation = game.get_infomation()
        for episode in range(int(1e20)):
            # initial observation
            if game.command == 'exit':
                RL.plot_cost()
            game.init()
Пример #15
0
        # print(correct / length)
        ret.append(correct / length)
    return ret


datas = pd.read_csv('datas/runtime_dataset.csv', header=None)
datas = np.array(datas)
length = len(datas)
np.random.shuffle(datas)
datas = pd.DataFrame(datas)
cost_his = []
RL = DeepQNetwork(6,
                  14,
                  learning_rate=0.00001,
                  reward_decay=0.99,
                  e_greedy=0.9,
                  replace_target_iter=100,
                  memory_size=500,
                  output_graph=True,
                  cost_his=cost_his)
for i in range(0, 500):
    row = datas.iloc[i, :]
    data = np.array(row, dtype=float).reshape(1, 17)
    env = WorkloadEnv(data)
    # vm_init = data[0][10:13]
    # vm_obj = data[0][14:17]
    # vm_init = np.array(vm_init, dtype=float).reshape(1, 3)
    # vm_obj = np.array(vm_obj, dtype=float).reshape(1, 3)
    # vm_gap = np.absolute(vm_init - vm_obj)
    # if np.sum(vm_gap) < 5:
    #     run_this(env, RL, 5)
Пример #16
0
class Policy:
    def __init__(self):

        # define DQN algorithm
        tensorflow.reset_default_graph()
        self.RL1 = DeepQNetwork(
            n_actions=len(robot1.action_space),
            n_features=len(robot1.observation_space),
            learning_rate=0.0001,
            e_greedy=0.9,
            replace_target_iter=100,
            memory_size=2000,
            e_greedy_increment=0.008,
        )  #0.0008

        self.total_steps = 0
        self.rsrvl = 0.05  # to check
        self.train()

    def train(self):
        vrep.simxFinish(-1)  #clean up the previous stuff
        clientID = vrep.simxStart('127.0.0.1', 19997, True, True, 5000, 5)
        if clientID == -1:
            print("Could not connect to server")
            sys.exit()

        first = True
        for i_episode in range(100):

            vrep.simxStartSimulation(clientID, vrep.simx_opmode_oneshot)

            observation1 = robot1.observation_space
            observation2 = robot2.observation_space
            ep_r = 0
            self.steps = 0
            while True:

                action1 = self.RL1.choose_action(observation1)  # To check
                # print(action1)
                observation1_, done1 = robot1.step(action1)  # To check
                #print(observation1_)
                observation2_ = robot2.observation_space
                done2 = False

                x1, y1, z1, vx1, vy1, vz1, theta1_f, theta2_f, theta3_f = observation1_  # To check
                x2, y2, z2, vx2, vy2, vz2, theta1_b, theta2_b, theta3_b = observation2_

                error, self.r1 = vrep.simxGetObjectHandle(
                    clientID, 'body#1', vrep.simx_opmode_blocking)
                error, self.r2 = vrep.simxGetObjectHandle(
                    clientID, 'body#7', vrep.simx_opmode_blocking)

                error, position_hexa_base1 = vrep.simxGetObjectPosition(
                    clientID, self.r1, -1, vrep.simx_opmode_blocking)
                x1 = position_hexa_base1[0]
                y1 = position_hexa_base1[1]
                z1 = position_hexa_base1[2]

                error, position_hexa_base2 = vrep.simxGetObjectPosition(
                    clientID, self.r2, -1, vrep.simx_opmode_blocking)
                x2 = position_hexa_base2[0]
                y2 = position_hexa_base2[1]
                z2 = position_hexa_base2[2]

                distance = np.sqrt((x1 - x2) * (x1 - x2) + (y1 - y2) *
                                   (y1 - y2))
                distance = np.sqrt((x1 - x2) * (x1 - x2) + (y1 - y2) *
                                   (y1 - y2) + (z1 - z2) * (z1 - z2))
                ###########################
                if np.abs(z2 - z1) > 0.2 or distance > 1 or distance < 0.15:
                    done1 = True

                ######################################

                #########reward function############
                #reward1 = self.rsrvl + (vx1 + vx2) - 0.5 * (np.abs(vy1) + np.abs(vy2))
                reward = 100 * (distance < 0.15) - 10 * (
                    distance > 1 or np.abs(z2 - z1) > 0.2) - 0.1 * self.steps
                #################################
                #print("R: ", reward)
                print("distance: ", distance)
                # print("z1:",z1)

                self.RL1.store_transition(observation1, action1, reward,
                                          observation1_)

                if self.total_steps > 200 and self.total_steps % 5 == 0:
                    self.RL1.learn()

                ep_r += reward
                if done1:
                    #print(done1)
                    print('episode: ', i_episode, 'ep_r: ', round(ep_r, 2),
                          ' epsilon: ', round(self.RL1.epsilon, 2))
                    break

                observation1 = observation1_
                observation2 = observation2_
                self.total_steps += 1
                self.steps += 1
                done1 = False

            first = False
            vrep.simxStopSimulation(clientID, vrep.simx_opmode_blocking)
            time.sleep(1)
        self.RL1.plot_cost()
Пример #17
0
            if (step > 200) and (step % 5 == 0):
                RL.learn()

            # swap observation
            observation = observation_

            # break while loop when end of this episode
            if done:
                break
            step += 1

    # end of game
    print('game over')
    env.destroy()


if __name__ == "__main__":
    # maze game
    env = Maze()
    RL = DeepQNetwork(env.n_actions, env.n_features,
                      learning_rate=0.01,
                      reward_decay=0.9,
                      e_greedy=0.9,
                      replace_target_iter=200,
                      memory_size=2000,
                      output_graph=True
                      )
    env.after(100, run_maze)
    env.mainloop()
    RL.plot_cost()
Пример #18
0
import tensorflow as tf
import threading
import time
import sys
import socket
FLAG = True
tf.reset_default_graph()

env = PowerSys()
env.reset()
env.show()
RL = DeepQNetwork(
    n_actions=len(env.action_space),
    n_features=len(env.observation),
    learning_rate=0.01,
    e_greedy=0.9,
    replace_target_iter=100,
    memory_size=2000,
    e_greedy_increment=0.0006,
)

loss = [env.state['loss']]


class trainThread(threading.Thread):
    def __init__(self):
        threading.Thread.__init__(self)
        self.total_steps = 0

    def run(self):
        if FLAG:
Пример #19
0
for i in enumerate(env.observation_space.shape):
    nfeatures.append(i[1])
print(nfeatures)

total_steps = 0

action_map = [[-1, 1, 0], [-0.5, 1, 0], [0, 1, 0], [0.5, 1, 0], [1, 1, 0],
              [-1, 1, 0.5], [-0.5, 1, 0.5], [0, 1, 0.5], [0.5, 1, 0.5],
              [1, 1, 0.5], [-1, 0.5, 1], [-0.5, 0.5, 1], [0, 0.5, 1],
              [0.5, 0.5, 1], [1, 0.5, 1]]

RL = DeepQNetwork(
    n_actions=len(action_map),
    features=nfeatures,
    learning_rate=0.05,
    e_greedy=0.9,
    replace_target_iter=100,
    memory_size=2000,
    e_greedy_increment=0.001,
)

for i_episode in range(500):
    observation = env.reset()
    ep_r = 0
    step = 0

    while True:
        if i_episode > 200:
            env.render()

        if step > 50:
Пример #20
0
parser.add_argument('--train',
                    dest='train',
                    action='store_true',
                    default=False)
parser.add_argument('--test', dest='test', action='store_true', default=True)
args = parser.parse_args()

# os.environ["CUDA_VISIBLE_DEVICES"] = "1"
grid_x = 4
grid_y = 1

RL = DeepQNetwork(
    n_actions=2**(grid_x * grid_y),
    n_features=5 * (grid_x * grid_y),
    #   learning_rate=0.01,
    e_greedy=0.9,
    replace_target_iter=100,
    memory_size=10000,
    e_greedy_increment=0.001,
)

window = tk.Tk()
window.title('my window')
window.geometry('1000x1000')
canvas = tk.Canvas(window, bg='white', height=1000, width=1000)

x = []
y = []
for i in range(grid_x):
    x.append(i + 1)
for i in range(grid_y):
Пример #21
0
import tensorflow as tf
from RL_brain import DeepQNetwork

env = gym.make('sheep-v0')
env = env.unwrapped

print(env.action_space)
print(env.observation_space)

sess = tf.Session()
with tf.variable_scope('RL_DQN'):
    RL_DQN = DeepQNetwork(
        n_actions=env.DISCRETE_Action_Count,
        n_features=env.FEATURE_Count,
        learning_rate=0.01,
        e_greedy=0.55,
        replace_target_iter=100,
        memory_size=30000,
        e_greedy_increment=0.001,
        random=False,
    )
with tf.variable_scope('RL_random'):
    RL_random = DeepQNetwork(
        n_actions=env.DISCRETE_Action_Count,
        n_features=env.FEATURE_Count,
        learning_rate=0.01,
        e_greedy=0.9,
        replace_target_iter=100,
        memory_size=30000,
        e_greedy_increment=0.001,
        random=True,
    )
Пример #22
0
class Policy:
    def __init__(self):
        # define publisher to control start or stop vrep
        self.pub_start_signal = rospy.Publisher("/startSimulation",
                                                Bool,
                                                queue_size=1)
        self.pub_stop_signal = rospy.Publisher("/stopSimulation",
                                               Bool,
                                               queue_size=1)

        # maybe start the simulation with hand would be a good way
        time.sleep(2)
        start_signal = Bool()
        start_signal.data = True
        self.pub_start_signal.publish(start_signal)
        time.sleep(2)
        # define DQN algorithm
        tensorflow.reset_default_graph()
        self.RL1 = DeepQNetwork(
            n_actions=len(robot1.action_space),
            n_features=len(robot1.observation_space),
            learning_rate=0.01,
            e_greedy=0.9,
            replace_target_iter=100,
            memory_size=2000,
            e_greedy_increment=0.0008,
        )

        self.total_steps = 0
        self.rsrvl = 0.05  # to check
        self.train()

    def train(self):
        for i_episode in range(600):
            stop_signal = Bool()
            stop_signal.data = True
            self.pub_stop_signal.publish(stop_signal)
            time.sleep(0.2)
            start_signal = Bool()
            start_signal.data = True
            self.pub_start_signal.publish(start_signal)

            observation1 = robot1.observation_space
            observation2 = robot2.observation_space
            ep_r = 0
            while True:
                # restart the simulation
                action1 = self.RL1.choose_action(observation1)  # To check
                # print(action1)
                observation1_, done1 = robot1.step(action1)  # To check
                observation2_, done2 = robot2.step(4)

                x1, y1, z1, vx1, vy1, vz1, theta1_f, theta2_f, theta3_f = observation1_  # To check
                x2, y2, z2, vx2, vy2, vz2, theta1_b, theta2_b, theta3_b = observation2_
                distance = np.sqrt((x1 - x2) * (x1 - x2) + (y1 - y2) *
                                   (y1 - y2))

                ###########################
                if distance < 0.07 or z1 < -5 or distance > 1:
                    done1 = True
                ######################################

                #########reward function############
                reward1 = self.rsrvl + (
                    vx1 + vx2) - 0.5 * (np.abs(vy1) + np.abs(vy2))
                reward = reward1 + (distance < 0.03) - 0.5 * np.abs(y2 - y1)
                #################################
                print("R: ", reward)
                print("distance: ", distance)
                print("z1:", z1)

                self.RL1.store_transition(observation1, action1, reward,
                                          observation1_)

                if self.total_steps > 1000 or done1:
                    self.RL1.learn()

                ep_r += reward
                if done1:
                    print(done1)
                    print('episode: ', i_episode, 'ep_r: ', round(ep_r, 2),
                          ' epsilon: ', round(self.RL1.epsilon, 2))
                    break

                observation1 = observation1_
                observation2 = observation2_
                self.total_steps += 1
                done1 = False
            stop_ = Bool()
            stop_.data = True
            self.pub_stop_signal.publish(stop_)
Пример #23
0
    env = tetrisML.TetrisGame("Training " + test[0], test[1], test[2], test[3])

    MEMORY_SIZE = 100000
    ACTION_SPACE = env.num_actions
    FEATURES = env.num_features
    FEATURESHAPE = env.featureShape
    STATESHAPE = env.stateShape

    sess = tf.Session()

    with tf.variable_scope('Double_DQN'):
        DQN = DeepQNetwork(n_actions=ACTION_SPACE,
                           n_features=FEATURES,
                           memory_size=MEMORY_SIZE,
                           e_greedy_increment=0.0000045,
                           e_greedy=0.9,
                           reward_decay=0.75,
                           output_graph=False,
                           feature_shape=FEATURESHAPE,
                           state_shape=STATESHAPE,
                           learning_rate=2E-6)

    sess.run(tf.global_variables_initializer())

    q_natural = train(DQN)

    print("Evaluating agent...")
    avg_score, avg_length, scoreChange, heuristicChange = testAgent(
        q_natural, test, frames=5000)
    print("avg score (official): %s" % avg_score)
    print("avg game length (frames): %s" % avg_length)
    print("avg score change per frame: %s" % scoreChange)
Пример #24
0
    env_list = []
    env_list2 = []
    for file_path in file_path_list:

        df = pd.read_csv(file_path)
        df = df.sort_values('trade_date', ascending=True)
        df = df.iloc[22:].reset_index(drop=True)  # 去除前几天没有均线信息
        env_list.append(stock(df.iloc[0:1500]))
        env_list2.append(stock(df.iloc[1500:].reset_index(drop=True)))

    RL = DeepQNetwork(
        env_list[0].n_actions,
        env_list[0].n_features,
        learning_rate=0.002,
        reward_decay=0.9,
        e_greedy=0.9,
        replace_target_iter=300,
        memory_size=7000,
        batch_size=256,
        # output_graph=True
    )

    run(env_list, max_round)

    # env = stock(df)
    # env = BackTest(env, show_log=True)
    # env.draw('trade.png', 'profit.png')

    i = 0
    for env in env_list2:
        BackTest(env, show_log=False)
Пример #25
0
actionMap = [15, 25, 35, 45, 55]

#*********************Main**********************
if __name__ == '__main__':
    #get basic equipment lists
    traci.start(sumoCmd)
    tls = traci.trafficlight.getIDList()
    lanes = traci.trafficlight.getControlledLanes(''.join(tls))
    dets = traci.lanearea.getIDList()

    state_space_size = nX
    action_space_size = 5
    RL = DeepQNetwork(action_space_size,
                      state_space_size,
                      learning_rate=0.01,
                      reward_decay=0.9,
                      e_greedy=0.9,
                      replace_target_iter=50,
                      memory_size=200,
                      output_graph=True)
    total_reward = []
    delays = []

    for episode in range(1):
        state = get_states(tls, dets)
        steps = 0
        while steps < 1000:
            action = RL.choose_action(state)
            u = list([actionMap[action]])
            delay1 = vehicle_delay(lanes)
            take_action(u, tls)
            delta_t = round((traci.trafficlight.getNextSwitch('center') -
Пример #26
0
    steps_begin_learn = timesteps * 0.1
    load_model = False

    RL_set = []
    graph_set = []
    sess_set = []
    for i in range(n_agents):
        g = tf.Graph()
        sess = tf.Session(graph=g)

        with sess.as_default():
            with g.as_default():

                RL = DeepQNetwork(
                    n_actions=n_actions,
                    n_features=vector_obs_len,
                    sess=sess,
                    agent_id=i,
                    learning_rate=0.002,
                    reward_decay=0.99,
                    replace_target_iter=5000,
                    memory_size=80000,
                    batch_size=32,
                    save_model_freq=10000,
                    load_model=False,
                )

                RL_set.append(RL)

    # run_this写成一个所有智能体执行的函数
    run_this(RL_set, n_episode, steps_begin_learn, learn_freq, n_agents)
Пример #27
0
import numpy as np
import matplotlib.pyplot as plt

env = gym.make('MountainCar-v0')
env = env.unwrapped

print(env.action_space)
print(env.observation_space)
print(env.observation_space.high)
print(env.observation_space.low)

# adjust hyper-parameters here
RL = DeepQNetwork(n_actions=3,
                  n_features=2,
                  learning_rate=0.01,
                  e_greedy=0.9,
                  replace_target_iter=300,
                  memory_size=3000,
                  e_greedy_increment=0.02,
                  output_graph=False)

total_steps = 0
steps_list = list()
record = 0
for i_episode in range(50):

    observation = env.reset()
    ep_r = 0
    while True:
        env.render()

        action = RL.choose_action(observation)
Пример #28
0
                # RL[1].plot_cost()
                env.plt('clean')
                break

            step = step + 1
        step_of_each_round.append(step)
    plt.ioff()
    for i in range(8):
            RL[i].plot_cost()
    plt.pause(5)
    print(sum(step_of_each_round) / round)
    plt.plot(step_of_each_round)
    plt.pause(0)


if __name__ == "__main__":
    env = env()
    RL = []
    for i in range(8):
        RL.append(DeepQNetwork(n_actions=4, n_features=2,
                               agent_id=i,
                               learning_rate=0.01,
                               reward_decay=0.9,
                               e_greedy=0.9,
                               replace_target_iter=200,
                               memory_size=2000,
                               output_graph=False
                               ))

    run()
Пример #29
0

# end of game

if __name__ == "__main__":
    print("path:" + sys.path[0])
    global r, energy, tlist, RL
    tf.reset_default_graph()
    env = TrainLine(110)
    env.seed(1)
    RL = DeepQNetwork(
        env.n_actions,
        env.n_features,
        learning_rate=0.0001,
        reward_decay=0.99,  #奖励折扣
        e_greedy=0.6,  #探索效率
        replace_target_iter=512,
        memory_size=10000,
        batch_size=256,
        e_greedy_increment=0.35 / 3000,
        # output_graph=True
    )
    #	RL.LoadModel()
    energy = []
    r = []
    tlist = []
    run_train()
    RL.plot_cost()
    plot(r, 'reward')
    plot(energy, 'energy')
    plot(tlist, 'time')
    draw_mean(r, 'reward')
    plt.xlabel('training episodes')
    plt.show()

    env.reset_uav()
    env.render()
    end = time.time()

    print("game over!")
    print('运行时间:', end - start)
    engine = pyttsx3.init()

    engine.say('程序运行完成')
    engine.runAndWait()
    env.destory()


if __name__ == "__main__":
    env = Maze()
    RL = DeepQNetwork(
        env.n_actions,
        env.n_features,
        learning_rate=0.02,
        reward_decay=0.9,
        e_greedy=0.9,
        replace_target_iter=200,  # 尝试减少替换次数100
        memory_size=2000,  # 尝试扩大记忆库6000
        output_graph=False)
    env.after(100, run_maze)
    env.mainloop()
    RL.plot_cost()  # 观察神经网络的误差曲线
Пример #31
0
''' coding: utf-8 '''

import gym
from RL_brain import DeepQNetwork

env = gym.make('MountainCar-v0')
env = env.unwrapped

print(env.action_space)
print(env.observation_space)
print(env.observation_space.high)
print(env.observation_space.low)

RL = DeepQNetwork(n_actions=3,n_features=2,learning_rate=0.001,e_greedy=0.9,replace_target_iter=300,memory_size=3000,e_greedy_increment=0.0001)

total_step = 0

for i_episode in range(10):

    observation = env.reset()
    ep_r = 0
    while True:
        env.render()

        action = RL.choose_action(observation)

        observation_,reward,done,info = env.step(action)

        position,velocity = observation_

        reward = abs(position - (-0.5))
Пример #32
0
            if (step > 200) and (step % 5 == 0):  # learn once for each 5 steps
                RL.learn()
            # update the state

            if done or step == MAX_EP_STEPS-1:
                print('This episode is done, start the next episode')
                break
            state = state_
    return best_reward, best_state, reward_his

if __name__ == "__main__":
    len_max = 128
    env = Env(len_max=len_max, n_fe=14, n_classes=6)
    RL = DeepQNetwork(env.n_actions, env.n_features,
                      learning_rate=0.01,
                      reward_decay=0.8,
                      e_greedy=0.8,
                      replace_target_iter=200,
                      len_max=len_max,
                      memory_size=2000,
                      e_greedy_increment=0.002 # each step, the e_greedy will increase 0.002
                      # output_graph=True
                      )
    # env.after(10, run_env)0.01
    best_reward, best_state, reward_his = run_env()
    print best_state, best_reward
    # env.mainloop()
    # RL.plot_cost()
    pickle.dump(RL.cost_his, open("cost_his_emotiv", "wb"))
    pickle.dump(reward_his, open("reward_his_emotiv", "wb"))
    pickle.dump(best_state, open("best_state_emotiv", 'wb'))