Пример #1
0
 def __init__(self,
              num_opponents: int,
              num_teammates: int,
              port: int = 6000):
     # Game Interface:
     self.game_interface = HFOAttackingPlayer(num_opponents=num_opponents,
                                              num_teammates=num_teammates,
                                              port=port)
     self.game_interface.connect_to_server()
     # Features Interface:
     self.features = PlasticFeatures(num_op=num_opponents,
                                     num_team=num_teammates)
     # Actions Interface:
     self.actions = Actions(num_team=num_teammates,
                            features=self.features,
                            game_interface=self.game_interface)
     # Agent instance:
     self.agent = DQNAgent(num_features=self.features.num_features,
                           num_actions=self.actions.get_num_actions(),
                           learning_rate=0.005,
                           discount_factor=0.99,
                           epsilon=1,
                           final_epsilon=0.001,
                           epsilon_decay=0.99997,
                           tau=0.125)
     # Auxiliar attributes
     self.starting_pos_list = list(STARTING_POSITIONS.values())
     self.num_ep = 0
Пример #2
0
 def __init__(self, num_opponents: int, num_teammates: int,
              port: int = 6000):
     # Game Interface:
     self.game_interface = HFOAttackingPlayer(num_opponents=num_opponents,
                                              num_teammates=num_teammates,
                                              port=port)
     self.game_interface.connect_to_server()
     # Features Interface:
     self.features = PlasticFeatures(num_op=num_opponents,
                                     num_team=num_teammates)
     # Actions Interface:
     self.actions = Actions(num_team=num_teammates, features=self.features,
                            game_interface=self.game_interface)
     # Auxiliar attributes
     self.starting_pos_list = list(STARTING_POSITIONS.values())
     self.num_ep = 0
Пример #3
0
 def __init__(self,
              num_opponents: int,
              num_teammates: int,
              port: int = 6000):
     # Game Interface:
     self.game_interface = HFOAttackingPlayer(num_opponents=num_opponents,
                                              num_teammates=num_teammates,
                                              port=port)
     # Features Interface:
     self.features = PlasticFeatures(num_op=num_opponents,
                                     num_team=num_teammates)
     # Actions Interface:
     self.actions = Actions(num_team=num_teammates,
                            features=self.features,
                            game_interface=self.game_interface)
     # Agent instance:
     self.agent = DQNAgent(num_features=self.features.num_features,
                           num_actions=self.actions.get_num_actions(),
                           learning_rate=0.005,
                           discount_factor=0.99,
                           epsilon=1,
                           final_epsilon=0.001,
                           epsilon_decay=0.99997,
                           tau=0.125)
Пример #4
0
class Player:
    def __init__(self,
                 num_opponents: int,
                 num_teammates: int,
                 port: int = 6000):
        # Game Interface:
        self.game_interface = HFOAttackingPlayer(num_opponents=num_opponents,
                                                 num_teammates=num_teammates,
                                                 port=port)
        self.game_interface.connect_to_server()
        # Features Interface:
        self.features = PlasticFeatures(num_op=num_opponents,
                                        num_team=num_teammates)
        # Actions Interface:
        self.actions = Actions(num_team=num_teammates,
                               features=self.features,
                               game_interface=self.game_interface)
        # Agent instance:
        self.agent = DQNAgent(num_features=self.features.num_features,
                              num_actions=self.actions.get_num_actions(),
                              learning_rate=0.005,
                              discount_factor=0.99,
                              epsilon=1,
                              final_epsilon=0.001,
                              epsilon_decay=0.99997,
                              tau=0.125)
        # Auxiliar attributes
        self.starting_pos_list = list(STARTING_POSITIONS.values())
        self.num_ep = 0

    def get_reward(self, game_status: int, correct_action: bool) -> int:
        reward = 0
        if game_status == GOAL:
            reward += 1000
        elif game_status in [CAPTURED_BY_DEFENSE, OUT_OF_BOUNDS, OUT_OF_TIME]:
            reward -= 1000
        else:
            if correct_action:
                reward += 1
            else:
                reward -= 1
        return reward

    def parse_episode(self,
                      episodes_transitions: List[Transition],
                      verbose: bool = False) -> list:
        if len(episodes_transitions) == 0:
            return []

        # Remove last actions without ball:
        last_reward = copy(episodes_transitions[-1].reward)
        num_transitions = len(episodes_transitions)
        for idx in range(num_transitions - 1, -1, -1):
            # Has ball:
            if episodes_transitions[idx].obs[5] == 10:
                break
            # No ball:
            elif episodes_transitions[idx].obs[5] == -10:
                pass
            else:
                raise ValueError("Features has ball, wrong value!!")
        else:
            idx = num_transitions

        episodes_transitions = episodes_transitions[:idx + 1]
        # selected wrong action?:
        if episodes_transitions[-1].correct_action is False and last_reward > 0:
            episodes_transitions[-1].reward = -1
        else:
            episodes_transitions[-1].reward = last_reward
        episodes_transitions[-1].done = True

        if verbose and random.random() > 0.95:
            print("\n ** Transictions:")
            for el in episodes_transitions:
                print_transiction(el.to_tuple(), self.actions)
            print('**')

        return episodes_transitions

    def set_starting_game_conditions(self,
                                     start_with_ball: bool = True,
                                     start_pos: tuple = None,
                                     starts_fixed_position: bool = True,
                                     verbose: bool = False):
        """
        Set starting game conditions. Move for initial position, for example
        """
        if start_with_ball:
            if starts_fixed_position:
                if not start_pos:
                    aux_idx = self.num_ep % len(self.starting_pos_list)
                    start_pos = self.starting_pos_list[aux_idx]
                self.actions.dribble_to_pos(start_pos, stop=True)
                if verbose:
                    print(f"[START GAME] Ball; {start_pos}")
            else:
                while not self.features.has_ball():
                    self.actions.move_to_ball()
                if verbose:
                    print(f"[START GAME] Ball; RANDOM")

        else:
            if starts_fixed_position:
                if not start_pos:
                    ball_pos: list = list(self.features.get_ball_coord())
                    starting_corners = get_vertices_around_ball(ball_pos)
                    start_pos = random.choice(starting_corners)
                self.actions.move_to_pos(start_pos)
                if verbose:
                    print(f"[START GAME] NO Ball; {start_pos}")
            else:
                # Start in current position
                if verbose:
                    print(f"[START GAME] NO Ball; RANDOM")
                pass
        # Informs the other players that it is ready to start:
        self.game_interface.hfo.say(settings.PLAYER_READY_MSG)

    def test(self,
             num_episodes: int,
             start_with_ball: bool = True,
             starts_fixed_position: bool = True) -> float:
        """
        @param num_episodes: number of episodes to run
        @param start_with_ball: flag
        @param starts_fixed_position: flag
        @return: (float) the win rate
        """
        # metrics variables:
        _num_wins = 0
        self.num_ep = 0
        for ep in range(num_episodes):
            # Check if server still running:
            try:
                self.game_interface.check_server_is_up()
            except ServerDownError:
                print("!!SERVER DOWN!! TEST {}/{}".format(ep, num_episodes))
                avr_win_rate = round(_num_wins / (ep + 1), 2)
                print("[TEST: Summary] WIN rate = {};".format(avr_win_rate))
                return avr_win_rate

            # Update features:
            self.features.update_features(
                self.game_interface.get_observation())

            # Go to origin position:
            print(f"\n[TEST] {ep}/{num_episodes}")
            self.set_starting_game_conditions(
                start_with_ball=start_with_ball,
                start_pos=None,
                starts_fixed_position=starts_fixed_position,
                verbose=True)

            # Start learning loop
            prev_act = -1
            while self.game_interface.in_game():
                # Update environment features:
                features_array = self.features.get_features().copy()
                # Act:
                act = self.agent.exploit_actions(features_array)
                if act != prev_act:
                    self.actions.execute_action(act, verbose=True)
                else:
                    self.actions.execute_action(act)
                prev_act = act

            # Update auxiliar variables:
            if self.game_interface.scored_goal():
                print("[GOAL]")
                _num_wins += 1
            else:
                print("[FAIL]")
            # Game Reset
            self.game_interface.reset()
            self.num_ep += 1
        avr_win_rate = round(_num_wins / num_episodes, 2)
        print("[TEST: Summary] WIN rate = {};".format(avr_win_rate))
        return avr_win_rate

    def train(self,
              num_train_episodes: int,
              num_total_train_ep: int,
              starts_fixed_position: bool = True,
              start_with_ball: bool = True):
        """
        @param num_train_episodes: number of episodes to train in this iteration
        @param num_total_train_ep: number total of episodes to train
        @param starts_fixed_position: bool
        @param start_with_ball: bool
        @raise ServerDownError
        @return: (QLearningAgentV5) the agent
        """
        # metrics variables:
        _num_wins = 0
        _sum_epsilons = 0
        self.num_ep = 0
        for ep in range(num_train_episodes):
            # Check if server still running:
            try:
                self.game_interface.check_server_is_up()
            except ServerDownError:
                print("!!SERVER DOWN!! TRAIN {}/{}".format(
                    ep, num_train_episodes))
                return
            # Update features:
            self.features.update_features(
                self.game_interface.get_observation())

            # Go to origin position:
            self.set_starting_game_conditions(
                start_with_ball=start_with_ball,
                start_pos=None,
                starts_fixed_position=starts_fixed_position)

            # Start learning loop
            status = IN_GAME
            episode_buffer = list()
            while self.game_interface.in_game():
                # Update environment features:
                features_array = self.features.get_features()

                # Act:
                act = self.agent.act(features_array)
                status, correct_action = self.actions.execute_action(act)

                # Every step we update replay memory and train main network
                done = not self.game_interface.in_game()
                # Store transition:
                # (obs, action, reward, new obs, done?)
                transition = Transition(obs=features_array,
                                        act=act,
                                        reward=self.get_reward(
                                            status, correct_action),
                                        new_obs=self.features.get_features(),
                                        done=done,
                                        correct_action=correct_action)
                episode_buffer.append(transition)

                # Train:
                self.agent.train(terminal_state=done)

            if self.game_interface.scored_goal() or status == GOAL:
                if episode_buffer[-1].reward != 1000:
                    raise Exception("Last action reward is wrong!")
                _num_wins += 1

            # Add episodes:
            episode_buffer = self.parse_episode(episode_buffer, verbose=True)
            self.agent.store_episode(episode_buffer)
            # Update auxiliar variables:
            _sum_epsilons += self.agent.epsilon
            # Update Agent:
            self.agent.restart(num_total_train_ep)
            # Game Reset
            self.game_interface.reset()
            self.num_ep += 1
        avr_epsilon = round(_sum_epsilons / num_train_episodes, 3)
        print("[TRAIN: Summary] WIN rate = {}; AVR epsilon = {}".format(
            _num_wins / num_train_episodes, avr_epsilon))
        return avr_epsilon