def return_DQN(name_, fea_num): with tf.variable_scope(name_): dueling_DQN = DuelingDQN(n_actions=ACTION_SPACE, n_features=fea_num, memory_size=MEMORY_SIZE, e_greedy_increment=0.001, sess=sess, dueling=True, output_graph=True) return dueling_DQN
def __init__(self, gateway): self.gateway = gateway self.DuelingDQN = DuelingDQN(actions, 141) self.actionMap = ActionMap() self.R = 0 # total reward in a round self.action = 0 self.MaxPoint = 120 # max projectile damage (ver 4.10) self.SubPoint = 0 # max damage in usual action (ver 4.10) self.countProcess = 0 self.frameData = None self.nonDelay = None self.currentFrameNum = None self.inputKey = None self.cc = None self.player = None self.simulator = None self.lastHp_opp = None self.lastHp_my = None self.isGameJustStarted = None self.currentRoundNum = None self.isFinishd = None self.reward = None self.state = [] self.frame_per_action = self.DuelingDQN.frame_per_action
from RL_brain import DuelingDQN import numpy as np import matplotlib.pyplot as plt import tensorflow as tf env = gym.make('Pendulum-v0') env = env.unwrapped env.seed(1) MEMORY_SIZE = 3000 ACTION_SPACE = 25 sess = tf.Session() with tf.variable_scope('natural'): natural_DQN = DuelingDQN(n_actions=ACTION_SPACE, n_features=3, memory_size=MEMORY_SIZE, e_greedy_increment=0.001, sess=sess, dueling=False) with tf.variable_scope('dueling'): dueling_DQN = DuelingDQN(n_actions=ACTION_SPACE, n_features=3, memory_size=MEMORY_SIZE, e_greedy_increment=0.001, sess=sess, dueling=True, output_graph=True) sess.run(tf.global_variables_initializer())
break step += 1 # 总步数 # end of game print('game over') env.destroy() if __name__ == "__main__": env = Maze() RL = DuelingDQN( env.n_actions, env.n_features, #observation/state 的属性,如长宽高 learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, dueling=True, replace_target_iter=200, # 每 200 步替换一次 target_net 的参数 memory_size=2000, # 记忆上限 # output_graph=True # 是否输出 tensorboard 文件 ) env.after(100, run_maze) #进行强化学习训练 env.mainloop() # 观看训练时间曲线 his_dueling = np.vstack((episodes, steps)) file = open('his_dueling', 'wb') pickle.dump(his_dueling, file) file.close() plt.plot(his_dueling[0, :], his_dueling[1, :] - his_dueling[1, 0],
if total_steps > MEMORY_SIZE: RL.learn() if total_steps - MEMORY_SIZE > 15000: break observation = observation_ total_steps += 1 return RL.cost_his, acc_r if __name__ == "__main__": dueling_DQN = DuelingDQN(n_actions=ACTION_SPACE, n_features=3, learning_rate=0.01, e_greedy=0.9, replace_target_iter=100, memory_size=MEMORY_SIZE, e_greedy_increment=0.00005, dueling=True) natural_DQN = DuelingDQN(n_actions=ACTION_SPACE, n_features=3, learning_rate=0.01, e_greedy=0.9, replace_target_iter=100, memory_size=MEMORY_SIZE, e_greedy_increment=0.00005, dueling=False) c_natural, r_natural = train(natural_DQN) c_dueling, r_dueling = train(dueling_DQN)
from env import environment from RL_brain import DuelingDQN import pandas as pd import numpy as np from matplotlib import pyplot as plt import algorithm_naive_1 as naive env = environment(number_of_sbs=9) # 定义使用 gym 库中的那一个环境 env_naive = naive.environment(bs_list=env.bs_list) number = env.number_of_sbs RL = DuelingDQN(n_actions=2 ** number, n_features=4 * number + 2, learning_rate=0.01, e_greedy=0.9, replace_target_iter=100, memory_size=2000, e_greedy_increment=0.0008, output_graph=True) total_steps = 0 # 记录步数 a = pd.DataFrame(columns=['energy_cost']) plt.figure() plt.ion() plt.show() ep_r_total = [] count_time = 0 energy = [] EE_rate_total = np.zeros(50) EE_rate_mean = [] counter = 0 mean_min = 10 min_index = 0 for i_episode in range(1000): print('iteration is %d' % i_episode)
import os import tensorflow as tf from VCM_environment import VCMEN from RL_brain import DuelingDQN MEMORY_SIZE = 1000 ACTION_SPACE = 8 if __name__ == "__main__": env = VCMEN() load_model_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "models") agent = DuelingDQN(n_actions=ACTION_SPACE, n_features=144, memory_size=MEMORY_SIZE, environment_name=env.name, dueling=True, load_model_dir=load_model_dir) state_t, reward_t, win = env.observe() step = 0 while not win: step += 1 print(state_t) # choose observation = state_t.flatten() action_t = agent.choose_action(observation) # act env.execute_action(action_t) state_t_1, reward_t, win = env.observe() state_t = state_t_1
class tensorflow_agent(object): def __init__(self, gateway): self.gateway = gateway self.DuelingDQN = DuelingDQN(actions, 141) self.actionMap = ActionMap() self.R = 0 # total reward in a round self.action = 0 self.MaxPoint = 120 # max projectile damage (ver 4.10) self.SubPoint = 0 # max damage in usual action (ver 4.10) self.countProcess = 0 self.frameData = None self.nonDelay = None self.currentFrameNum = None self.inputKey = None self.cc = None self.player = None self.simulator = None self.lastHp_opp = None self.lastHp_my = None self.isGameJustStarted = None self.currentRoundNum = None self.isFinishd = None self.reward = None self.state = [] self.frame_per_action = self.DuelingDQN.frame_per_action def close(self): pass def getInformation(self, frameData, nonDelay): # Getting the frame data of the current frame self.frameData = frameData self.cc.setFrameData(self.frameData, self.player) self.nonDelay = nonDelay self.currentFrameNum = nonDelay.getFramesNumber() # first frame is 14 # please define this method when you use FightingICE version 3.20 or later def roundEnd(self, x, y, z): score = (self.nonDelay.getCharacter(not self.player).getHp() / (self.nonDelay.getCharacter(not self.player).getHp() + self.nonDelay.getCharacter(self.player).getHp())) * 1000 csvList = [] csvList.append(self.currentRoundNum) csvList.append(self.R) csvList.append(self.DuelingDQN.epsilon) csvList.append(abs(self.nonDelay.getCharacter(self.player).getHp())) csvList.append(abs( self.nonDelay.getCharacter(not self.player).getHp())) csvList.append(score) csvList.append(self.win) with open("./saved_networks/resultData.csv", 'a') as f: writer = csv.writer(f, lineterminator='\n') writer.writerow(csvList) # with open('./saved_networks/battleResult.csv', 'a') as file: # file.write("The current step is: " + str(self.brain.session.run(self.brain.timeStep))) # file.write(" frame number: " + str(z) + " p1: " + str(x) + " p2: " + str(y)) # file.write("\n") print(x) print(y) print(z) def makeResultFile(self): if not os.path.exists("./saved_networks/"): print("Make direction") os.makedirs("./saved_networks/") if os.path.isfile('./saved_networks/resultData.csv') == False: with open('./saved_networks/resultData.csv', 'w') as file: file.write('') csvList = [] csvList.append("roundNum") csvList.append("R") csvList.append("epsilon") csvList.append("myHp") csvList.append("oppHp") csvList.append("score") csvList.append("win") f = open("./saved_networks/resultData.csv", 'a') writer = csv.writer(f, lineterminator='\n') writer.writerow(csvList) f.close() # please define this method when you use FightingICE version 4.00 or later def getScreenData(self, sd): pass def initialize(self, gameData, player): # Initializing the command center, the simulator and some other things self.inputKey = self.gateway.jvm.struct.Key() self.frameData = self.gateway.jvm.struct.FrameData() self.cc = self.gateway.jvm.aiinterface.CommandCenter() self.player = player self.simulator = gameData.getSimulator() self.makeResultFile() return 0 def input(self): # Return the input for the current frame return self.inputKey def playAction(self, state): self.action = self.DuelingDQN.get_action(state) action_name = self.actionMap.actionMap[self.action] print("current action is: ", action_name) self.cc.commandCall(action_name) def getObservation(self): my = self.frameData.getCharacter(self.player) opp = self.frameData.getCharacter(not self.player) myHp = abs(my.getHp() / 500) myEnergy = my.getEnergy() / 300 myX = ((my.getLeft() + my.getRight()) / 2) / 960 myY = ((my.getBottom() + my.getTop()) / 2) / 640 mySpeedX = my.getSpeedX() / 15 mySpeedY = my.getSpeedY() / 28 myState = my.getAction().ordinal() oppHp = abs(opp.getHp() / 500) oppEnergy = opp.getEnergy() / 300 oppX = ((opp.getLeft() + opp.getRight()) / 2) / 960 oppY = ((opp.getBottom() + opp.getTop()) / 2) / 640 oppSpeedX = opp.getSpeedX() / 15 oppSpeedY = opp.getSpeedY() / 28 oppState = opp.getAction().ordinal() oppRemainingFrame = opp.getRemainingFrame() / 70 observation = [] observation.append(myHp) observation.append(myEnergy) observation.append(myX) observation.append(myY) if mySpeedX < 0: observation.append(0) else: observation.append(1) observation.append(abs(mySpeedX)) if mySpeedY < 0: observation.append(0) else: observation.append(1) observation.append(abs(mySpeedY)) for i in range(56): if i == myState: observation.append(1) else: observation.append(0) observation.append(oppHp) observation.append(oppEnergy) observation.append(oppX) observation.append(oppY) if oppSpeedX < 0: observation.append(0) else: observation.append(1) observation.append(abs(oppSpeedX)) if oppSpeedY < 0: observation.append(0) else: observation.append(1) observation.append(abs(oppSpeedY)) for i in range(56): if i == oppState: observation.append(1) else: observation.append(0) observation.append(oppRemainingFrame) myProjectiles = self.frameData.getProjectilesByP1() oppProjectiles = self.frameData.getProjectilesByP2() if len(myProjectiles) == 2: myHitDamage = myProjectiles[0].getHitDamage() / 200.0 myHitAreaNowX = ( (myProjectiles[0].getCurrentHitArea().getLeft() + myProjectiles[0].getCurrentHitArea().getRight()) / 2) / 960.0 myHitAreaNowY = ( (myProjectiles[0].getCurrentHitArea().getTop() + myProjectiles[0].getCurrentHitArea().getBottom()) / 2) / 640.0 observation.append(myHitDamage) observation.append(myHitAreaNowX) observation.append(myHitAreaNowY) myHitDamage = myProjectiles[1].getHitDamage() / 200.0 myHitAreaNowX = ( (myProjectiles[1].getCurrentHitArea().getLeft() + myProjectiles[1].getCurrentHitArea().getRight()) / 2) / 960.0 myHitAreaNowY = ( (myProjectiles[1].getCurrentHitArea().getTop() + myProjectiles[1].getCurrentHitArea().getBottom()) / 2) / 640.0 observation.append(myHitDamage) observation.append(myHitAreaNowX) observation.append(myHitAreaNowY) elif len(myProjectiles) == 1: myHitDamage = myProjectiles[0].getHitDamage() / 200.0 myHitAreaNowX = ( (myProjectiles[0].getCurrentHitArea().getLeft() + myProjectiles[0].getCurrentHitArea().getRight()) / 2) / 960.0 myHitAreaNowY = ( (myProjectiles[0].getCurrentHitArea().getTop() + myProjectiles[0].getCurrentHitArea().getBottom()) / 2) / 640.0 observation.append(myHitDamage) observation.append(myHitAreaNowX) observation.append(myHitAreaNowY) for t in range(3): observation.append(0.0) else: for t in range(6): observation.append(0.0) if len(oppProjectiles) == 2: oppHitDamage = oppProjectiles[0].getHitDamage() / 200.0 oppHitAreaNowX = ( (oppProjectiles[0].getCurrentHitArea().getLeft() + oppProjectiles[0].getCurrentHitArea().getRight()) / 2) / 960.0 oppHitAreaNowY = ( (oppProjectiles[0].getCurrentHitArea().getTop() + oppProjectiles[0].getCurrentHitArea().getBottom()) / 2) / 640.0 observation.append(oppHitDamage) observation.append(oppHitAreaNowX) observation.append(oppHitAreaNowY) oppHitDamage = oppProjectiles[1].getHitDamage() / 200.0 oppHitAreaNowX = ( (oppProjectiles[1].getCurrentHitArea().getLeft() + oppProjectiles[1].getCurrentHitArea().getRight()) / 2) / 960.0 oppHitAreaNowY = ( (oppProjectiles[1].getCurrentHitArea().getTop() + oppProjectiles[1].getCurrentHitArea().getBottom()) / 2) / 640.0 observation.append(oppHitDamage) observation.append(oppHitAreaNowX) observation.append(oppHitAreaNowY) elif len(oppProjectiles) == 1: oppHitDamage = oppProjectiles[0].getHitDamage() / 200.0 oppHitAreaNowX = ( (oppProjectiles[0].getCurrentHitArea().getLeft() + oppProjectiles[0].getCurrentHitArea().getRight()) / 2) / 960.0 oppHitAreaNowY = ( (oppProjectiles[0].getCurrentHitArea().getTop() + oppProjectiles[0].getCurrentHitArea().getBottom()) / 2) / 640.0 observation.append(oppHitDamage) observation.append(oppHitAreaNowX) observation.append(oppHitAreaNowY) for t in range(3): observation.append(0.0) else: for t in range(6): observation.append(0.0) # print(len(observation)) #141 # type(observation) -> list # return list(map(lambda x: float(x), observation)) return np.array(observation, dtype=np.float64) def makeReward(self, finishRound): if finishRound == 0: # Defence reward = SubPoint - (currentMyHp - lastMyHp ) # Attack reward = currentOppHp - lastOppHp self.reward = ( self.SubPoint - (abs(self.nonDelay.getCharacter(self.player).getHp()) - self.lastHp_my)) self.reward += 1 * ( abs(self.nonDelay.getCharacter(not self.player).getHp()) - self.lastHp_opp) self.R += self.reward print("The reward is: ", self.reward) return self.reward else: if abs(self.nonDelay.getCharacter(self.player).getHp()) < abs( self.nonDelay.getCharacter(not self.player).getHp()): self.reward = ( self.SubPoint - (abs(self.nonDelay.getCharacter(self.player).getHp()) - self.lastHp_my)) self.reward += 1 * ( abs(self.nonDelay.getCharacter(not self.player).getHp()) - self.lastHp_opp) self.R += self.reward self.win = 1 return self.MaxPoint else: self.win = 0 return 0 def setLastHp(self): self.lastHp_opp = abs( self.nonDelay.getCharacter(not self.player).getHp()) self.lastHp_my = abs(self.nonDelay.getCharacter(self.player).getHp()) def ableAction(self): if self.nonDelay.getCharacter( self.player).isControl() == True and self.isFinishd == 0: return True else: return False def processing(self): try: self.frame_per_action -= 1 if self.frameData.getEmptyFlag( ) or self.frameData.getRemainingFramesNumber() <= 0: self.isGameJustStarted = True return if not self.isGameJustStarted: self.frameData = self.simulator.simulate( self.frameData, self.player, None, None, 17) else: # this else is used only 1 time in first of round self.isGameJustStarted = False self.currentRoundNum = self.frameData.getRound() self.R = 0 self.isFinishd = 0 if self.cc.getSkillFlag(): self.inputKey = self.cc.getSkillKey() return self.inputKey.empty() self.cc.skillCancel() if self.currentFrameNum == 14: self.state = self.getObservation() # self.DuelingDQN.setInitState(tuple(state)) self.setLastHp() self.playAction(self.state) elif self.currentFrameNum > 3550 and self.isFinishd == 0: reward = self.makeReward(1) state_ = self.getObservation() self.DuelingDQN.store_transition(self.state, self.action, reward, state_) self.playAction(state_) self.isFinishd = 1 self.DuelingDQN.learn() elif self.ableAction(): self.DuelingDQN.learn() if self.frame_per_action <= 0: reward = self.makeReward(0) state_ = self.getObservation() self.DuelingDQN.store_transition(self.state, self.action, reward, state_) self.setLastHp() self.playAction(state_) self.state = state_ print("\n") self.frame_per_action = self.DuelingDQN.frame_per_action self.countProcess += 1 except Exception as e: print(e) # This part is mandatory class Java: implements = ["aiinterface.AIInterface"]
if (step > 200) and (step % 5 == 0): RL.learn() # swap observation observation = observation_ # break while loop when end of this episode if done: break step += 1 print('game over') env.destroy() if __name__ == '__main__': env = Maze() RL = DuelingDQN( env.n_actions, env.n_features, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, replace_target_iter=200, memory_size=2000, # output_graph=True ) env.after(100, run_maze) env.mainloop() RL.plot_cost()
break step += 1 time += 1 # end of training print 'Training over' env.get_data_info() if __name__ == "__main__": if len(sys.argv) == 1: num = 0 else: num = (int)(sys.argv[1]) env = elev_sys(num=num, oddeven=False) if num / 2 < 1: #RL = DeepQNetwork(9,len(env._step(0,0)[0]),batch_size=64,e_greedy_increment=0.001) RL = DuelingDQN(9, len(env._step(0, 0)[0]), memory_size=10000, dueling=False, e_greedy_increment=0.00005) else: RL = DuelingDQN(9, len(env._step(0, 0)[0]), memory_size=10000, dueling=True, e_greedy_increment=0.0001) #RL.load(3) train(env, RL, num=num) RL.save(num)
from RL_brain import DuelingDQN import pandas as pd import numpy as np from matplotlib import pyplot as plt import algorithm_naive_1 as naive import sys step = 200 env = environment(number_of_sbs=3, random=False) temp = env.bs_list.copy() # 定义使用 gym 库中的那一个环境 env_naive = naive.environment(bs_list=temp) number = env.number_of_sbs RL = DuelingDQN(n_actions=2**number, n_features=4 * number + 2, learning_rate=0.01, e_greedy=1, replace_target_iter=100, memory_size=2000, e_greedy_increment=0.0008, output_graph=True) total_steps = 0 # 记录步数 a = pd.DataFrame(columns=['energy_cost']) ep_r_total = [] count_time = 0 energy = [] energy_naive = [] EE_rate_total = np.zeros(step) EE_rate_mean = [] counter = 0 mean_min = 10 min_index = 0
# swap observation observation = observation_ step += 1 # break while loop when end of this episode if done: break scores.append(env.score) if episode % 5 == 0: print("#" * 80) print(episode, ",", int(step / 10), ",score:", env.score, ",e:", RL.epsilon) print("avg-score: {}".format(np.mean(list(scores)[-1500:]))) if episode % 100 == 0: print(observation) env.show() if __name__ == "__main__": env = Game() RL = DuelingDQN(env.n_actions, env.n_features, learning_rate=1e-4, reward_decay=0.95, e_greedy=0.99, start_epsilon=0.5, e_greedy_increment=1e-5) train_2048()
import tensorflow as tf import matplotlib.pyplot as plt from RL_brain import DuelingDQN from VCM_environment import VCMEN N_EPOCHS = 1500 MEMORY_SIZE = 500 ACTION_SPACE = 8 if __name__ == "__main__": env = VCMEN() agent = DuelingDQN(n_actions=ACTION_SPACE, n_features=144, memory_size=MEMORY_SIZE, environment_name=env.name, e_greedy_increment=0.01, dueling=True) win_cnt = 0 acc_r = [0] for foo in range(N_EPOCHS): step = 0 env.reset() state_t, reward_t, win = env.observe() while True: step += 1 # choose observation = state_t.flatten() action_t = agent.choose_action(observation) # act
from env import environment from RL_brain import DuelingDQN import pandas as pd import numpy as np from matplotlib import pyplot as plt env = environment() # 定义使用 gym 库中的那一个环境 RL1 = DuelingDQN(n_actions=2, n_features=6, learning_rate=0.01, e_greedy=0.9, replace_target_iter=100, memory_size=2000, e_greedy_increment=0.0008, output_graph=True) RL2 = DuelingDQN(n_actions=2, n_features=6, learning_rate=0.01, e_greedy=0.9, replace_target_iter=100, memory_size=2000, e_greedy_increment=0.0008, output_graph=True) RL3 = DuelingDQN(n_actions=2, n_features=6, learning_rate=0.01, e_greedy=0.9, replace_target_iter=100, memory_size=2000, e_greedy_increment=0.0008,
action_ = (action) * 50 + 300 # action = state * press_coefficient state_, reward, done = env.step(action_) if not done: reward += 0.05 * (tmp + 1) RL.store_transition(state, action, np.float64(reward), state_) if done == True: print('......挂掉了') RL.learn() env.touch_the_restart() break tmp += 1 max_ = max(max_, tmp) state = state_ print('你的阿尔法跳一跳最远跳了:', max_, '下') env = Env() if __name__ == '__main__': with tf.Session() as sess: with tf.variable_scope('dueling'): RL = DuelingDQN(n_actions=14, n_features=1, memory_size=5000000, e_greedy_increment=0.0001, sess=sess, dueling=True, output_graph=True) sess.run(tf.global_variables_initializer()) tf.app.run()