Exemplo n.º 1
0
def run_episode(algorithm,agent,act_rmp,move_rmp,PASS_COUNT,paused):
    restart()
    
    act_station = cv2.resize(cv2.cvtColor(grab_screen(station_size), cv2.COLOR_RGBA2RGB),(WIDTH,HEIGHT))
    act_hp_station = cv2.cvtColor(cv2.resize(grab_screen(window_size),(HP_WIDTH,HP_HEIGHT)),cv2.COLOR_BGR2GRAY)

    act_boss_hp = boss_hp(act_hp_station, 570)
    act_boss_last_hp = act_boss_hp
    act_self_hp = player_hp(act_hp_station)
    min_hp = 9


    move_station = cv2.resize(cv2.cvtColor(grab_screen(station_size), cv2.COLOR_RGBA2RGB),(WIDTH,HEIGHT))
    move_hp_station = cv2.cvtColor(cv2.resize(grab_screen(window_size),(HP_WIDTH,HP_HEIGHT)),cv2.COLOR_BGR2GRAY)

    move_boss_hp = boss_hp(move_hp_station, 570)
    move_boss_last_hp = move_boss_hp
    move_self_hp = player_hp(move_hp_station)



    step = 0
    done = 0
    total_reward = 0



    start_time = time.time()
    # Deley Reward
    DeleyReward = collections.deque(maxlen=DELEY_REWARD)
    DeleyStation = collections.deque(maxlen=DELEY_REWARD)
    DeleyActions = collections.deque(maxlen=DELEY_REWARD)

    # move direction of player 0 for stay, 1 for left, 2 for right
    direction = 0
    while True:
        
        # player hp bar is not in normal state and the left pixels are not black
        if(act_hp_station[40][95] != 56 and act_hp_station[300][30] > 20 and act_hp_station[200][30] > 20):
            print("Not in game yet 1")
            continue
        
        # there is not boss hp bar
        if act_hp_station[401][98] != 0 and act_hp_station[401][98] == 0:
            print("Not in game yet 2")
            continue

        last_time = time.time()
        # no more than 10 mins
        # if time.time() - start_time > 600:
        #     break

        step += 1

        
        actions = agent.act_sample(act_station)

        # execute action in action seq
        for action in actions:
            d = agent.move_sample(move_station)
            # print("Move:", move_name[d] )

            if d == direction:
                pass
            elif d == 0:
                Tool.Actions.Nothing()
            elif d == 1:
                Tool.Actions.Move_Left()
            elif d == 2:
                Tool.Actions.Move_Right()

            take_action(action)

            # print("Action: ", action_name[action])

            next_move_station = cv2.resize(cv2.cvtColor(grab_screen(station_size), cv2.COLOR_RGBA2RGB),(WIDTH,HEIGHT))
            next_move_hp_station = cv2.cvtColor(cv2.resize(grab_screen(window_size),(HP_WIDTH,HP_HEIGHT)),cv2.COLOR_BGR2GRAY)

            next_move_boss_hp = boss_hp(next_move_hp_station, move_boss_last_hp)
            move_boss_last_hp = move_boss_hp
            next_move_self_hp = player_hp(next_move_hp_station)

            if min_hp == 9 and next_move_self_hp == 1:
                next_move_self_hp = 9

            reward, done, min_hp = Tool.Helper.action_judge(move_boss_hp, next_move_boss_hp,move_self_hp, next_move_self_hp, min_hp)
            # print(reward)

            move_rmp.append((move_station, d, reward, next_move_station,done))



            if done == 1:
                Tool.Actions.Nothing()
                break
            elif done == 2:
                Tool.Actions.Nothing()
                break

            move_station = next_move_station
            move_self_hp = next_move_self_hp
            move_boss_hp = next_move_boss_hp
            direction = d

        if done == 1:
            Tool.Actions.Nothing()
            break
        elif done == 2:
            PASS_COUNT += 1
            Tool.Actions.Nothing()
            break


        next_act_station = cv2.resize(cv2.cvtColor(grab_screen(station_size), cv2.COLOR_RGBA2RGB),(WIDTH,HEIGHT))
        next_act_hp_station = cv2.cvtColor(cv2.resize(grab_screen(window_size),(HP_WIDTH,HP_HEIGHT)),cv2.COLOR_BGR2GRAY)

        next_act_boss_hp = boss_hp(next_act_hp_station, act_boss_last_hp)

        act_boss_last_hp = act_boss_hp

        next_act_self_hp = player_hp(next_act_hp_station)

        if min_hp == 9 and next_move_self_hp == 1:
            next_move_self_hp = 9


        reward, done, min_hp = Tool.Helper.action_judge(act_boss_hp, next_act_boss_hp,act_self_hp, next_act_self_hp, min_hp)
        DeleyReward.append(reward)
        DeleyStation.append(act_station)
        DeleyActions.append(actions)
        reward = mean(DeleyReward)
        # print("reward: ",reward,"self_hp: ",next_act_self_hp,"boss_hp: ",next_act_boss_hp)

        if len(DeleyReward) >= DELEY_REWARD:
            act_rmp.append((DeleyStation[0],DeleyActions[0],reward,DeleyStation[1],done))
        
        total_reward += reward
        paused = Tool.Helper.pause_game(paused)

        if done == 1:
            Tool.Actions.Nothing()
            break
        elif done == 2:
            PASS_COUNT += 1
            Tool.Actions.Nothing()
            break

        act_station = next_act_station
        act_self_hp = next_act_self_hp
        act_boss_hp = next_act_boss_hp

    if (len(move_rmp) > MEMORY_WARMUP_SIZE):
        print("move learning")
        batch_station,batch_moveions,batch_reward,batch_next_station,batch_done = move_rmp.sample(BATCH_SIZE)
        algorithm.move_learn(batch_station,batch_moveions,batch_reward,batch_next_station,batch_done)   

    if (len(act_rmp) > MEMORY_WARMUP_SIZE):
        print("act learning")
        batch_station,batch_actions,batch_reward,batch_next_station,batch_done = act_rmp.sample(BATCH_SIZE)
        algorithm.act_learn(batch_station,batch_actions,batch_reward,batch_next_station,batch_done)

    return total_reward, step, PASS_COUNT
Exemplo n.º 2
0
def run_episode(hp, algorithm, agent, act_rmp_correct, act_rmp_wrong,
                move_rmp_correct, move_rmp_wrong, PASS_COUNT, paused):
    restart()
    # learn while load game
    for i in range(1):
        if (len(move_rmp_correct) > MEMORY_WARMUP_SIZE):
            # print("move learning")
            batch_station, batch_actions, batch_reward, batch_next_station, batch_done = move_rmp_correct.sample(
                BATCH_SIZE)
            algorithm.move_learn(batch_station, batch_actions, batch_reward,
                                 batch_next_station, batch_done)

        if (len(act_rmp_correct) > MEMORY_WARMUP_SIZE):
            # print("action learning")
            batch_station, batch_actions, batch_reward, batch_next_station, batch_done = act_rmp_correct.sample(
                BATCH_SIZE)
            algorithm.act_learn(batch_station, batch_actions, batch_reward,
                                batch_next_station, batch_done)
        if (len(move_rmp_wrong) > MEMORY_WARMUP_SIZE):
            # print("move learning")
            batch_station, batch_actions, batch_reward, batch_next_station, batch_done = move_rmp_wrong.sample(
                BATCH_SIZE)
            algorithm.move_learn(batch_station, batch_actions, batch_reward,
                                 batch_next_station, batch_done)

        if (len(act_rmp_wrong) > MEMORY_WARMUP_SIZE):
            # print("action learning")
            batch_station, batch_actions, batch_reward, batch_next_station, batch_done = act_rmp_wrong.sample(
                BATCH_SIZE)
            algorithm.act_learn(batch_station, batch_actions, batch_reward,
                                batch_next_station, batch_done)

    step = 0
    done = 0
    total_reward = 0

    start_time = time.time()
    # Deley Reward
    DeleyMoveReward = collections.deque(maxlen=DELEY_REWARD)
    DeleyActReward = collections.deque(maxlen=DELEY_REWARD)
    DeleyStation = collections.deque(maxlen=DELEY_REWARD +
                                     1)  # 1 more for next_station
    DeleyActions = collections.deque(maxlen=DELEY_REWARD)
    DeleyDirection = collections.deque(maxlen=DELEY_REWARD)

    while True:
        boss_hp_value = hp.get_boss_hp()
        self_hp = hp.get_self_hp()
        if boss_hp_value > 800 and boss_hp_value <= 900 and self_hp >= 1 and self_hp <= 9:
            break

    thread1 = FrameBuffer(1,
                          "FrameBuffer",
                          WIDTH,
                          HEIGHT,
                          maxlen=FRAMEBUFFERSIZE)
    thread1.start()

    last_hornet_y = 0
    while True:
        step += 1
        # last_time = time.time()
        # no more than 10 mins
        # if time.time() - start_time > 600:
        #     break

        # in case of do not collect enough frames
        while (len(thread1.buffer) < FRAMEBUFFERSIZE):
            time.sleep(0.1)

        stations = thread1.get_buffer()
        boss_hp_value = hp.get_boss_hp()
        self_hp = hp.get_self_hp()
        player_x, player_y = hp.get_play_location()
        hornet_x, hornet_y = hp.get_hornet_location()
        soul = hp.get_souls()

        move, action = agent.sample(stations, soul)

        take_direction(move)
        take_action(action)

        next_station = thread1.get_buffer()
        next_boss_hp_value = hp.get_boss_hp()
        next_self_hp = hp.get_self_hp()
        next_player_x, next_player_y = hp.get_play_location()
        next_hornet_x, next_hornet_y = hp.get_hornet_location()
        hornet_skill1 = False
        if last_hornet_y > 32 and last_hornet_y < 32.5 and hornet_y > 32 and hornet_y < 32.5:
            hornet_skill1 = True
        last_hornet_y = hornet_y
        # get reward
        move_reward = Tool.Helper.move_judge(self_hp, next_self_hp, player_x,
                                             next_player_x, hornet_x,
                                             next_hornet_x, move,
                                             hornet_skill1)

        act_reward, done = Tool.Helper.action_judge(boss_hp_value,
                                                    next_boss_hp_value,
                                                    self_hp, next_self_hp,
                                                    next_player_x,
                                                    next_hornet_x, action)
        # print(reward)
        # print( action_name[action], ", ", move_name[d], ", ", reward)

        DeleyMoveReward.append(move_reward)
        DeleyActReward.append(act_reward)
        DeleyStation.append(stations)
        DeleyActions.append(action)
        DeleyDirection.append(move)

        if len(DeleyStation) >= DELEY_REWARD + 1:
            if DeleyMoveReward[0] > 0:
                move_rmp_correct.append(DeleyStation[0], DeleyDirection[0],
                                        DeleyMoveReward[0], DeleyStation[1],
                                        done)
            if DeleyMoveReward[0] < 0:
                move_rmp_wrong.append(DeleyStation[0], DeleyDirection[0],
                                      DeleyMoveReward[0], DeleyStation[1],
                                      done)

        if len(DeleyStation) >= DELEY_REWARD + 1:
            if mean(DeleyActReward) > 0:
                act_rmp_correct.append(DeleyStation[0], DeleyActions[0],
                                       mean(DeleyActReward), DeleyStation[1],
                                       done)
            if mean(DeleyActReward) < 0:
                act_rmp_wrong.append(DeleyStation[0], DeleyActions[0],
                                     mean(DeleyActReward), DeleyStation[1],
                                     done)

        station = next_station
        self_hp = next_self_hp
        boss_hp_value = next_boss_hp_value

        # if (len(act_rmp) > MEMORY_WARMUP_SIZE and int(step/ACTION_SEQ) % LEARN_FREQ == 0):
        #     print("action learning")
        #     batch_station,batch_actions,batch_reward,batch_next_station,batch_done = act_rmp.sample(BATCH_SIZE)
        #     algorithm.act_learn(batch_station,batch_actions,batch_reward,batch_next_station,batch_done)

        total_reward += act_reward
        paused = Tool.Helper.pause_game(paused)

        if done == 1:
            Tool.Actions.Nothing()
            break
        elif done == 2:
            PASS_COUNT += 1
            Tool.Actions.Nothing()
            break

    thread1.stop()

    for i in range(1):
        if (len(move_rmp_correct) > MEMORY_WARMUP_SIZE):
            # print("move learning")
            batch_station, batch_actions, batch_reward, batch_next_station, batch_done = move_rmp_correct.sample(
                BATCH_SIZE)
            algorithm.move_learn(batch_station, batch_actions, batch_reward,
                                 batch_next_station, batch_done)

        if (len(act_rmp_correct) > MEMORY_WARMUP_SIZE):
            # print("action learning")
            batch_station, batch_actions, batch_reward, batch_next_station, batch_done = act_rmp_correct.sample(
                BATCH_SIZE)
            algorithm.act_learn(batch_station, batch_actions, batch_reward,
                                batch_next_station, batch_done)
        if (len(move_rmp_wrong) > MEMORY_WARMUP_SIZE):
            # print("move learning")
            batch_station, batch_actions, batch_reward, batch_next_station, batch_done = move_rmp_wrong.sample(
                BATCH_SIZE)
            algorithm.move_learn(batch_station, batch_actions, batch_reward,
                                 batch_next_station, batch_done)

        if (len(act_rmp_wrong) > MEMORY_WARMUP_SIZE):
            # print("action learning")
            batch_station, batch_actions, batch_reward, batch_next_station, batch_done = act_rmp_wrong.sample(
                BATCH_SIZE)
            algorithm.act_learn(batch_station, batch_actions, batch_reward,
                                batch_next_station, batch_done)

    return total_reward, step, PASS_COUNT
Exemplo n.º 3
0
def run_episode(algorithm,agent,act_rmp,move_rmp,PASS_COUNT,paused):
    restart()
    
    for i in range(1):
        if (len(move_rmp) > MEMORY_WARMUP_SIZE):
            # print("move learning")
            batch_station,batch_actions,batch_reward,batch_next_station,batch_done = move_rmp.sample(BATCH_SIZE)
            algorithm.move_learn(batch_station,batch_actions,batch_reward,batch_next_station,batch_done)   

        if (len(act_rmp) > MEMORY_WARMUP_SIZE):
            # print("action learning")
            batch_station,batch_actions,batch_reward,batch_next_station,batch_done = act_rmp.sample(BATCH_SIZE)
            algorithm.act_learn(batch_station,batch_actions,batch_reward,batch_next_station,batch_done)




    hp_station = cv2.cvtColor(cv2.resize(grab_screen(window_size),(HP_WIDTH,HP_HEIGHT)),cv2.COLOR_BGR2GRAY)

    boss_hp_value = boss_hp(hp_station, 570)
    boss_last_hp = boss_hp_value
    self_hp = player_hp(hp_station)
    min_hp = 9


    step = 0
    done = 0
    total_reward = 0


    # start_time = time.time()
    # Deley Reward
    DeleyReward = collections.deque(maxlen=DELEY_REWARD)
    DeleyStation = collections.deque(maxlen=DELEY_REWARD)
    DeleyActions = collections.deque(maxlen=DELEY_REWARD)
    DeleyDirection = collections.deque(maxlen=DELEY_REWARD)
    
    thread1 = FrameBuffer(1, "FrameBuffer", WIDTH, HEIGHT, maxlen=FRAMEBUFFERSIZE)
    thread1.start()
    # move direction of player 0 for stay, 1 for left, 2 for right
    while True:
        
        # player hp bar is not in normal state and the left pixels are not black
        if(hp_station[40][95] != 56 and hp_station[300][30] > 20 and hp_station[200][30] > 20):
            # print("Not in game yet 1")
            hp_station = cv2.cvtColor(cv2.resize(grab_screen(window_size),(HP_WIDTH,HP_HEIGHT)),cv2.COLOR_BGR2GRAY)
            continue
        
        # there is not boss hp bar
        if hp_station[401][98] != 0 and hp_station[401][98] == 0:
            # print("Not in game yet 2")
            hp_station = cv2.cvtColor(cv2.resize(grab_screen(window_size),(HP_WIDTH,HP_HEIGHT)),cv2.COLOR_BGR2GRAY)
            continue

        # last_time = time.time()
        # no more than 10 mins
        # if time.time() - start_time > 600:
        #     break
        while(len(thread1.buffer) < FRAMEBUFFERSIZE):
            print(len(thread1.buffer))
            time.sleep(0.1)
        stations = thread1.get_buffer()

        d = agent.move_sample(stations)
        action = agent.act_sample(stations)
        step += 1

        # print("Move:", move_name[d] )
        # thread2 = TackAction(2, "ActionThread", d, action)
        # thread2.start()
        take_direction(d)
        take_action(action)


        next_station = cv2.resize(cv2.cvtColor(grab_screen(station_size), cv2.COLOR_RGBA2RGB),(WIDTH,HEIGHT))
        next_hp_station = cv2.cvtColor(cv2.resize(grab_screen(window_size),(HP_WIDTH,HP_HEIGHT)),cv2.COLOR_BGR2GRAY)

        next_boss_hp_value = boss_hp(next_hp_station, boss_last_hp)
        boss_last_hp = boss_hp_value
        next_self_hp = player_hp(next_hp_station)

        if min_hp == 9 and next_self_hp == 1:
            next_self_hp = 9

        reward, done, min_hp = Tool.Helper.action_judge(boss_hp_value, next_boss_hp_value,self_hp, next_self_hp, min_hp)
            # print(reward)
        # print( action_name[action], ", ", move_name[d], ", ", reward)

        DeleyReward.append(reward)
        DeleyStation.append(stations)
        DeleyActions.append(action)
        DeleyDirection.append(d)

        # print(mean(DeleyReward))


        if len(DeleyReward) >= DELEY_REWARD:
            move_rmp.append((DeleyStation[0],DeleyDirection[0],mean(DeleyReward),DeleyStation[1],done))
            act_rmp.append((DeleyStation[0],DeleyActions[0],mean(DeleyReward),DeleyStation[1],done))

        station = next_station
        self_hp = next_self_hp
        boss_hp_value = next_boss_hp_value
            

        # if (len(act_rmp) > MEMORY_WARMUP_SIZE and int(step/ACTION_SEQ) % LEARN_FREQ == 0):
        #     print("action learning")
        #     batch_station,batch_actions,batch_reward,batch_next_station,batch_done = act_rmp.sample(BATCH_SIZE)
        #     algorithm.act_learn(batch_station,batch_actions,batch_reward,batch_next_station,batch_done)

        total_reward += reward
        paused = Tool.Helper.pause_game(paused)

        if done == 1:
            Tool.Actions.Nothing()
            break
        elif done == 2:
            PASS_COUNT += 1
            Tool.Actions.Nothing()
            break


    thread1.stop()
    for i in range(2):
        if (len(move_rmp) > MEMORY_WARMUP_SIZE):
            # print("move learning")
            batch_station,batch_moveions,batch_reward,batch_next_station,batch_done = move_rmp.sample(BATCH_SIZE)
            algorithm.move_learn(batch_station,batch_moveions,batch_reward,batch_next_station,batch_done)   

        if (len(act_rmp) > MEMORY_WARMUP_SIZE):
            # print("action learning")
            batch_station,batch_actions,batch_reward,batch_next_station,batch_done = act_rmp.sample(BATCH_SIZE)
            algorithm.act_learn(batch_station,batch_actions,batch_reward,batch_next_station,batch_done)

    return total_reward, step, PASS_COUNT