def restart(): station_size = (230, 230, 1670, 930) while True: station = cv2.resize( cv2.cvtColor(grab_screen(station_size), cv2.COLOR_RGBA2RGB), (1000, 500)) if station[187][300][0] != 0: time.sleep(1) else: break time.sleep(1) Look_up() time.sleep(1.5) Look_up() time.sleep(1) while True: station = cv2.resize( cv2.cvtColor(grab_screen(station_size), cv2.COLOR_RGBA2RGB), (1000, 500)) if station[187][612][0] > 200: # PressKey(DOWN_ARROW) # time.sleep(0.1) # ReleaseKey(DOWN_ARROW) PressKey(C) time.sleep(0.1) ReleaseKey(C) break else: Look_up() time.sleep(0.2)
def restart(): station_size = (230, 230, 1670, 930) while True: station = cv2.resize( cv2.cvtColor(grab_screen(station_size), cv2.COLOR_RGBA2RGB), (1000, 500)) if station[187][300][0] != 0: time.sleep(1) else: break time.sleep(1) Look_up() time.sleep(2.5) Look_up() time.sleep(1) while True: station = cv2.resize( cv2.cvtColor(grab_screen(station_size), cv2.COLOR_RGBA2RGB), (1000, 500)) if station[187][612][0] > 200: Short_Jump() break else: Look_up() time.sleep(0.5)
def run_episode(algorithm,agent,act_rmp,move_rmp,PASS_COUNT,paused): restart() act_station = cv2.resize(cv2.cvtColor(grab_screen(station_size), cv2.COLOR_RGBA2RGB),(WIDTH,HEIGHT)) act_hp_station = cv2.cvtColor(cv2.resize(grab_screen(window_size),(HP_WIDTH,HP_HEIGHT)),cv2.COLOR_BGR2GRAY) act_boss_hp = boss_hp(act_hp_station, 570) act_boss_last_hp = act_boss_hp act_self_hp = player_hp(act_hp_station) min_hp = 9 move_station = cv2.resize(cv2.cvtColor(grab_screen(station_size), cv2.COLOR_RGBA2RGB),(WIDTH,HEIGHT)) move_hp_station = cv2.cvtColor(cv2.resize(grab_screen(window_size),(HP_WIDTH,HP_HEIGHT)),cv2.COLOR_BGR2GRAY) move_boss_hp = boss_hp(move_hp_station, 570) move_boss_last_hp = move_boss_hp move_self_hp = player_hp(move_hp_station) step = 0 done = 0 total_reward = 0 start_time = time.time() # Deley Reward DeleyReward = collections.deque(maxlen=DELEY_REWARD) DeleyStation = collections.deque(maxlen=DELEY_REWARD) DeleyActions = collections.deque(maxlen=DELEY_REWARD) # move direction of player 0 for stay, 1 for left, 2 for right direction = 0 while True: # player hp bar is not in normal state and the left pixels are not black if(act_hp_station[40][95] != 56 and act_hp_station[300][30] > 20 and act_hp_station[200][30] > 20): print("Not in game yet 1") continue # there is not boss hp bar if act_hp_station[401][98] != 0 and act_hp_station[401][98] == 0: print("Not in game yet 2") continue last_time = time.time() # no more than 10 mins # if time.time() - start_time > 600: # break step += 1 actions = agent.act_sample(act_station) # execute action in action seq for action in actions: d = agent.move_sample(move_station) # print("Move:", move_name[d] ) if d == direction: pass elif d == 0: Tool.Actions.Nothing() elif d == 1: Tool.Actions.Move_Left() elif d == 2: Tool.Actions.Move_Right() take_action(action) # print("Action: ", action_name[action]) next_move_station = cv2.resize(cv2.cvtColor(grab_screen(station_size), cv2.COLOR_RGBA2RGB),(WIDTH,HEIGHT)) next_move_hp_station = cv2.cvtColor(cv2.resize(grab_screen(window_size),(HP_WIDTH,HP_HEIGHT)),cv2.COLOR_BGR2GRAY) next_move_boss_hp = boss_hp(next_move_hp_station, move_boss_last_hp) move_boss_last_hp = move_boss_hp next_move_self_hp = player_hp(next_move_hp_station) if min_hp == 9 and next_move_self_hp == 1: next_move_self_hp = 9 reward, done, min_hp = Tool.Helper.action_judge(move_boss_hp, next_move_boss_hp,move_self_hp, next_move_self_hp, min_hp) # print(reward) move_rmp.append((move_station, d, reward, next_move_station,done)) if done == 1: Tool.Actions.Nothing() break elif done == 2: Tool.Actions.Nothing() break move_station = next_move_station move_self_hp = next_move_self_hp move_boss_hp = next_move_boss_hp direction = d if done == 1: Tool.Actions.Nothing() break elif done == 2: PASS_COUNT += 1 Tool.Actions.Nothing() break next_act_station = cv2.resize(cv2.cvtColor(grab_screen(station_size), cv2.COLOR_RGBA2RGB),(WIDTH,HEIGHT)) next_act_hp_station = cv2.cvtColor(cv2.resize(grab_screen(window_size),(HP_WIDTH,HP_HEIGHT)),cv2.COLOR_BGR2GRAY) next_act_boss_hp = boss_hp(next_act_hp_station, act_boss_last_hp) act_boss_last_hp = act_boss_hp next_act_self_hp = player_hp(next_act_hp_station) if min_hp == 9 and next_move_self_hp == 1: next_move_self_hp = 9 reward, done, min_hp = Tool.Helper.action_judge(act_boss_hp, next_act_boss_hp,act_self_hp, next_act_self_hp, min_hp) DeleyReward.append(reward) DeleyStation.append(act_station) DeleyActions.append(actions) reward = mean(DeleyReward) # print("reward: ",reward,"self_hp: ",next_act_self_hp,"boss_hp: ",next_act_boss_hp) if len(DeleyReward) >= DELEY_REWARD: act_rmp.append((DeleyStation[0],DeleyActions[0],reward,DeleyStation[1],done)) total_reward += reward paused = Tool.Helper.pause_game(paused) if done == 1: Tool.Actions.Nothing() break elif done == 2: PASS_COUNT += 1 Tool.Actions.Nothing() break act_station = next_act_station act_self_hp = next_act_self_hp act_boss_hp = next_act_boss_hp if (len(move_rmp) > MEMORY_WARMUP_SIZE): print("move learning") batch_station,batch_moveions,batch_reward,batch_next_station,batch_done = move_rmp.sample(BATCH_SIZE) algorithm.move_learn(batch_station,batch_moveions,batch_reward,batch_next_station,batch_done) if (len(act_rmp) > MEMORY_WARMUP_SIZE): print("act learning") batch_station,batch_actions,batch_reward,batch_next_station,batch_done = act_rmp.sample(BATCH_SIZE) algorithm.act_learn(batch_station,batch_actions,batch_reward,batch_next_station,batch_done) return total_reward, step, PASS_COUNT
# paused = pause_game(paused) # for i in range(5): # print(1) # take_direction(directions[i]) # take_action(actions[i]) # for x in os.listdir("./act_memory"): # print(1) window_size = (0, 0, 1920, 1017) station_size = (230, 230, 1670, 930) WIDTH = 768 HEIGHT = 407 hp_station = cv2.cvtColor( cv2.resize(grab_screen(station_size), (WIDTH, HEIGHT)), cv2.COLOR_BGR2GRAY) boss_blood = boss_hp(hp_station, 570) last_hp = boss_blood next_self_blood = player_hp(hp_station) min_hp = 9 check_point = (612, 187) start_time = time.time() for i in range(10): hp_station = cv2.cvtColor( cv2.resize(grab_screen(station_size), (WIDTH, HEIGHT)), cv2.COLOR_BGR2GRAY) fn = "./test_img/" + str(i) + ".png" cv2.imwrite(fn, hp_station) time.sleep(0.02)
def run_episode(algorithm,agent,act_rmp,move_rmp,PASS_COUNT,paused): restart() for i in range(1): if (len(move_rmp) > MEMORY_WARMUP_SIZE): # print("move learning") batch_station,batch_actions,batch_reward,batch_next_station,batch_done = move_rmp.sample(BATCH_SIZE) algorithm.move_learn(batch_station,batch_actions,batch_reward,batch_next_station,batch_done) if (len(act_rmp) > MEMORY_WARMUP_SIZE): # print("action learning") batch_station,batch_actions,batch_reward,batch_next_station,batch_done = act_rmp.sample(BATCH_SIZE) algorithm.act_learn(batch_station,batch_actions,batch_reward,batch_next_station,batch_done) hp_station = cv2.cvtColor(cv2.resize(grab_screen(window_size),(HP_WIDTH,HP_HEIGHT)),cv2.COLOR_BGR2GRAY) boss_hp_value = boss_hp(hp_station, 570) boss_last_hp = boss_hp_value self_hp = player_hp(hp_station) min_hp = 9 step = 0 done = 0 total_reward = 0 # start_time = time.time() # Deley Reward DeleyReward = collections.deque(maxlen=DELEY_REWARD) DeleyStation = collections.deque(maxlen=DELEY_REWARD) DeleyActions = collections.deque(maxlen=DELEY_REWARD) DeleyDirection = collections.deque(maxlen=DELEY_REWARD) thread1 = FrameBuffer(1, "FrameBuffer", WIDTH, HEIGHT, maxlen=FRAMEBUFFERSIZE) thread1.start() # move direction of player 0 for stay, 1 for left, 2 for right while True: # player hp bar is not in normal state and the left pixels are not black if(hp_station[40][95] != 56 and hp_station[300][30] > 20 and hp_station[200][30] > 20): # print("Not in game yet 1") hp_station = cv2.cvtColor(cv2.resize(grab_screen(window_size),(HP_WIDTH,HP_HEIGHT)),cv2.COLOR_BGR2GRAY) continue # there is not boss hp bar if hp_station[401][98] != 0 and hp_station[401][98] == 0: # print("Not in game yet 2") hp_station = cv2.cvtColor(cv2.resize(grab_screen(window_size),(HP_WIDTH,HP_HEIGHT)),cv2.COLOR_BGR2GRAY) continue # last_time = time.time() # no more than 10 mins # if time.time() - start_time > 600: # break while(len(thread1.buffer) < FRAMEBUFFERSIZE): print(len(thread1.buffer)) time.sleep(0.1) stations = thread1.get_buffer() d = agent.move_sample(stations) action = agent.act_sample(stations) step += 1 # print("Move:", move_name[d] ) # thread2 = TackAction(2, "ActionThread", d, action) # thread2.start() take_direction(d) take_action(action) next_station = cv2.resize(cv2.cvtColor(grab_screen(station_size), cv2.COLOR_RGBA2RGB),(WIDTH,HEIGHT)) next_hp_station = cv2.cvtColor(cv2.resize(grab_screen(window_size),(HP_WIDTH,HP_HEIGHT)),cv2.COLOR_BGR2GRAY) next_boss_hp_value = boss_hp(next_hp_station, boss_last_hp) boss_last_hp = boss_hp_value next_self_hp = player_hp(next_hp_station) if min_hp == 9 and next_self_hp == 1: next_self_hp = 9 reward, done, min_hp = Tool.Helper.action_judge(boss_hp_value, next_boss_hp_value,self_hp, next_self_hp, min_hp) # print(reward) # print( action_name[action], ", ", move_name[d], ", ", reward) DeleyReward.append(reward) DeleyStation.append(stations) DeleyActions.append(action) DeleyDirection.append(d) # print(mean(DeleyReward)) if len(DeleyReward) >= DELEY_REWARD: move_rmp.append((DeleyStation[0],DeleyDirection[0],mean(DeleyReward),DeleyStation[1],done)) act_rmp.append((DeleyStation[0],DeleyActions[0],mean(DeleyReward),DeleyStation[1],done)) station = next_station self_hp = next_self_hp boss_hp_value = next_boss_hp_value # if (len(act_rmp) > MEMORY_WARMUP_SIZE and int(step/ACTION_SEQ) % LEARN_FREQ == 0): # print("action learning") # batch_station,batch_actions,batch_reward,batch_next_station,batch_done = act_rmp.sample(BATCH_SIZE) # algorithm.act_learn(batch_station,batch_actions,batch_reward,batch_next_station,batch_done) total_reward += reward paused = Tool.Helper.pause_game(paused) if done == 1: Tool.Actions.Nothing() break elif done == 2: PASS_COUNT += 1 Tool.Actions.Nothing() break thread1.stop() for i in range(2): if (len(move_rmp) > MEMORY_WARMUP_SIZE): # print("move learning") batch_station,batch_moveions,batch_reward,batch_next_station,batch_done = move_rmp.sample(BATCH_SIZE) algorithm.move_learn(batch_station,batch_moveions,batch_reward,batch_next_station,batch_done) if (len(act_rmp) > MEMORY_WARMUP_SIZE): # print("action learning") batch_station,batch_actions,batch_reward,batch_next_station,batch_done = act_rmp.sample(BATCH_SIZE) algorithm.act_learn(batch_station,batch_actions,batch_reward,batch_next_station,batch_done) return total_reward, step, PASS_COUNT