def move_agent(action_name, game_interface: HFOAttackingPlayer,
               features: DiscreteFeatures1TeammateV1):
    # print("move_agent!")
    if "SHORT" in action_name:
        num_repetitions = 10
    elif "LONG" in action_name:
        num_repetitions = 20
    else:
        raise ValueError("ACTION NAME is WRONG")

    # Get Movement type:
    if "MOVE" in action_name:
        action = MOVE_TO
    elif "DRIBBLE" in action_name:
        action = DRIBBLE_TO
    else:
        raise ValueError("ACTION NAME is WRONG")

    if "UP" in action_name:
        action = (action, features.agent_coord[0], -0.9)
    elif "DOWN" in action_name:
        action = (action, features.agent_coord[0], 0.9)
    elif "LEFT" in action_name:
        action = (action, -0.8, features.agent_coord[1])
    elif "RIGHT" in action_name:
        action = (action, 0.8, features.agent_coord[1])
    else:
        raise ValueError("ACTION NAME is WRONG")

    attempts = 0
    while game_interface.in_game() and attempts < num_repetitions:
        status, observation = game_interface.step(action, features.has_ball())
        features.update_features(observation)
        attempts += 1
    return status, observation
Пример #2
0
 def kick_to_pos(self, pos: tuple, features: DiscreteFeatures1TeammateV1,
                 game_interface: HFOAttackingPlayer):
     """ The agent kicks to position expected """
     hfo_action = (KICK_TO, pos[0], pos[1], 2)
     status, observation = game_interface.step(hfo_action,
                                               features.has_ball())
     # Update features:
     features.update_features(observation)
Пример #3
0
def test(num_episodes: int, game_interface: HFOAttackingPlayer,
         features: DiscreteFeatures1TeammateV1, agent: QLearningAgent,
         actions: DiscreteActions1TeammateV1, reward_funct) -> float:
    """
    @param num_episodes: number of episodes to run
    @param game_interface: game interface, that manages interactions
    between both;
    @param features: features interface, from the observation array, gets the
    main features for the agent;
    @param agent: learning agent;
    @param actions: actions interface;
    @param reward_funct: reward function used
    @return: (float) the win rate
    """
    # Run training using Q-Learning
    num_goals = 0
    for ep in range(num_episodes):
        # Check if server still up:
        if game_interface.hfo.step() == SERVER_DOWN:
            print("Server is down while testing; episode={}".format(ep))
            break
        # Go to origin position:
        features.update_features(game_interface.get_state())
        go_to_origin_position(game_interface=game_interface,
                              features=features,
                              actions=actions)
        # Test loop:
        debug_counter = 0  # TODO remove
        while game_interface.in_game():
            # Update environment features:
            curr_state_id = features.get_state_index()
            has_ball = features.has_ball()

            # Act:
            debug_counter += 1
            action_idx = agent.act(curr_state_id)
            action_name = actions.map_action_to_str(action_idx, has_ball)
            print("Agent playing {}".format(action_name))

            # Step:
            status = execute_action(action_name=action_name,
                                    features=features,
                                    game_interface=game_interface)

            # update features:
            reward = reward_funct(status)
        num_goals += 1 if reward == 1 else 0

        if status == OUT_OF_TIME:
            if debug_counter < 5:
                raise NoActionPlayedError(
                    "agent was only able to choose {}".format(debug_counter))
        # Game Reset
        game_interface.reset()
    print("<<TEST>> NUM Goals = ", num_goals)
    print("<<TEST>> NUM episodes = ", (ep + 1))
    print("<<TEST>> AVR win rate = ", num_goals / (ep + 1))
    return num_goals / num_episodes
Пример #4
0
 def move_to_pos(self, pos: tuple, features: DiscreteFeatures1TeammateV1,
                 game_interface: HFOAttackingPlayer):
     """ The agent keeps moving until reach the position expected """
     curr_pos = features.get_pos_tuple(round_ndigits=1)
     while pos != curr_pos:
         hfo_action = (MOVE_TO, pos[0], pos[1])
         status, observation = game_interface.step(hfo_action,
                                                   features.has_ball())
         # Update features:
         features.update_features(observation)
         curr_pos = features.get_pos_tuple(round_ndigits=1)
def execute_action(action_name, game_interface: HFOAttackingPlayer,
                   features: DiscreteFeatures1TeammateV1):
    if action_name == "KICK_TO_GOAL":
        status, observation = shoot_ball(game_interface, features)
    elif action_name == "PASS":
        status, observation = pass_ball(game_interface, features)
    elif "MOVE" in action_name or "DRIBBLE" in action_name:
        status, observation = move_agent(action_name, game_interface, features)
    elif action_name == "NOOP":
        status, observation = do_nothing(game_interface, features)
    else:
        raise ValueError("Action Wrong name")
    features.update_features(observation)
    return status
def go_to_origin_position(game_interface: HFOAttackingPlayer,
                          features: DiscreteFeatures1TeammateV1,
                          actions: DiscreteActions1TeammateV1,
                          pos_name: str = None):
    if pos_name:
        origin_pos = ORIGIN_POSITIONS[pos_name]
    else:
        pos_name, origin_pos = random.choice(list(ORIGIN_POSITIONS.items()))
    # print("\nMoving to starting point: {0}".format(pos_name))
    pos = features.get_pos_tuple(round_ndigits=1)
    while origin_pos != pos:
        has_ball = features.has_ball()
        hfo_action: tuple = actions.dribble_to_pos(origin_pos)
        status, observation = game_interface.step(hfo_action, has_ball)
        features.update_features(observation)
        pos = features.get_pos_tuple(round_ndigits=1)
    # Informs the teammate that it is ready to start the game
    teammate_last_coord = features.teammate_coord.copy()
    counter = 0
    while teammate_last_coord.tolist() == features.teammate_coord.tolist():
        if counter >= 10:
            # print("STOP repeating the message")
            break
        game_interface.hfo.say(settings.PLAYER_READY_MSG)
        game_interface.hfo.step()
        observation = game_interface.hfo.getState()
        features.update_features(observation)
        # print("Action said READY!")
        counter += 1
def pass_ball(game_interface: HFOAttackingPlayer,
              features: DiscreteFeatures1TeammateV1):
    # print("pass_ball!")
    attempts = 0
    while game_interface.in_game() and features.has_ball():
        if attempts > 2:
            break
        elif attempts == 2:
            # Failed to pass 2 times
            print("Failed to PASS two times. WILL KICK")
            y = random.choice([0.17, 0, -0.17])
            hfo_action = (KICK_TO, 0.9, y, 2)
        else:
            hfo_action = (PASS, 11)
        status, observation = game_interface.step(hfo_action,
                                                  features.has_ball())
        features.update_features(observation)
        attempts += 1
    return status, observation
def shoot_ball(game_interface: HFOAttackingPlayer,
               features: DiscreteFeatures1TeammateV1):
    # print("shoot_ball!")
    attempts = 0
    while game_interface.in_game() and features.has_ball():
        if attempts > 3:
            break
        elif attempts == 3:
            # Failed to kick four times
            print("Failed to SHOOT 3 times. WILL KICK")
            y = random.choice([0.17, 0, -0.17])
            hfo_action = (KICK_TO, 0.9, y, 2)
        else:
            hfo_action = (SHOOT, )
        status, observation = game_interface.step(hfo_action,
                                                  features.has_ball())
        features.update_features(observation)
        attempts += 1
    return status, observation
Пример #9
0
 def shoot_ball(self, game_interface: HFOAttackingPlayer,
                features: DiscreteFeatures1TeammateV1):
     """ Tries to shoot, if it fail, kicks to goal randomly """
     attempts = 0
     while game_interface.in_game() and features.has_ball():
         if attempts > 3:
             break
         elif attempts == 3:
             # Failed to kick four times
             # print("Failed to SHOOT 3 times. WILL KICK")
             y = 0  # TODO random.choice([0.17, 0, -0.17])
             hfo_action = (KICK_TO, 0.9, y, 2)
         else:
             hfo_action = (SHOOT,)
         _, obs = game_interface.step(hfo_action, features.has_ball())
         features.update_features(obs)
         attempts += 1
     return game_interface.get_game_status(), \
         game_interface.get_observation_array()
Пример #10
0
 def pass_ball(self, game_interface: HFOAttackingPlayer,
               features: DiscreteFeatures1TeammateV1):
     """ Tries to use the PASS action, if it fails, Kicks in the direction
     of the teammate"""
     attempts = 0
     while game_interface.in_game() and features.has_ball():
         if attempts > 2:
             break
         elif attempts == 2:
             # Failed to pass 2 times
             # print("Failed to PASS two times. WILL KICK")
             hfo_action = (KICK_TO, features.teammate_coord[0],
                           features.teammate_coord[1], 1.5)
         else:
             hfo_action = (PASS, 11)
         _, obs = game_interface.step(hfo_action, features.has_ball())
         features.update_features(obs)
         attempts += 1
     return game_interface.get_game_status(), \
         game_interface.get_observation_array()
Пример #11
0
 def execute_action(self, action_idx: int,
                    game_interface: HFOAttackingPlayer,
                    features: DiscreteFeatures1TeammateV1):
     """ Receiving the idx of the action, the agent executes it and
     returns the game status """
     action_name = self.map_action_to_str(action_idx, features.has_ball())
     # KICK/SHOOT to goal
     if action_name == "KICK_TO_GOAL":
         status, observation = self.shoot_ball(game_interface, features)
     # PASS ball to teammate
     elif action_name == "PASS":
         status, observation = self.pass_ball(game_interface, features)
     # MOVE/DRIBBLE
     elif "MOVE" in action_name or "DRIBBLE" in action_name:
         status, observation = self.move_agent(action_name, game_interface,
                                               features)
     # DO NOTHING
     elif action_name == "NOOP":
         status, observation = self.do_nothing(game_interface, features)
     else:
         raise ValueError("Action Wrong name")
     # Update Features:
     features.update_features(observation)
     return status
Пример #12
0
def train(num_train_episodes: int, num_total_train_ep: int,
          game_interface: HFOAttackingPlayer,
          features: DiscreteFeatures1TeammateV1, agent: QLearningAgent,
          actions: DiscreteActions1TeammateV1, reward_funct):
    """
    @param num_train_episodes: number of episodes to train in this iteration
    @param num_total_train_ep: number total of episodes to train
    @param game_interface: game interface, that manages interactions
    between both;
    @param features: features interface, from the observation array, gets
    the main features for the agent;
    @param agent: learning agent;
    @param actions: actions interface;
    @param reward_funct: reward function used
    @return: (QLearningAgentV5) the agent
    """
    sum_score = 0
    sum_epsilons = 0
    agent.counter_explorations = 0
    agent.counter_exploitations = 0
    for ep in range(num_train_episodes):
        # Check if server still up:
        if game_interface.hfo.step() == SERVER_DOWN:
            raise ServerDownError("training; episode={}".format(ep))
        # Go to origin position:
        features.update_features(game_interface.get_state())
        go_to_origin_position(game_interface=game_interface,
                              features=features,
                              actions=actions)
        # Start learning loop
        debug_counter = 0  # TODO remove
        while game_interface.in_game():
            # Update environment features:
            curr_state_id = features.get_state_index()
            has_ball = features.has_ball()

            # Act:
            debug_counter += 1
            action_idx = agent.act(curr_state_id)
            action_name = actions.map_action_to_str(action_idx, has_ball)
            # print("Agent playing {} for {}".format(action_name, num_rep))

            # Step:
            status = execute_action(action_name=action_name,
                                    features=features,
                                    game_interface=game_interface)

            # Update environment features:
            reward = reward_funct(status)
            sum_score += reward
            new_state_id = features.get_state_index()
            agent.store_ep(state_idx=curr_state_id,
                           action_idx=action_idx,
                           reward=reward,
                           next_state_idx=new_state_id,
                           has_ball=has_ball,
                           done=not game_interface.in_game())
        if game_interface.get_game_status() == OUT_OF_TIME:
            if debug_counter < 5:
                raise NoActionPlayedError(
                    "agent was only able to choose {}".format(debug_counter))
        agent.learn_buffer()
        agent.update_hyper_parameters(num_total_episodes=num_total_train_ep)
        sum_epsilons += agent.epsilon
        # Game Reset
        game_interface.reset()
    print("<<TRAIN>> AVR reward = ", sum_score / num_train_episodes)
    print("<<TRAIN>> %Explorations={}% ".format(
        round(
            (agent.counter_explorations /
             (agent.counter_exploitations + agent.counter_explorations)), 4) *
        100))
Пример #13
0
    model_file = args.model_file
    # Directory
    save_dir = args.save_dir or mkdir(
        num_episodes, num_op, extra_note="retrain")

    # Initialize connection with the HFO server
    hfo_interface = HFOAttackingPlayer(num_opponents=num_op,
                                       num_teammates=num_team)
    hfo_interface.connect_to_server()
    print("Starting Training - id={}; num_opponents={}; num_teammates={}; "
          "num_episodes={};".format(hfo_interface.hfo.getUnum(), num_op,
                                    num_team, num_episodes))

    # Agent set-up
    reward_function = basic_reward
    features_manager = DiscreteFeatures1TeammateV1(num_team, num_op)
    actions_manager = DiscreteActions1TeammateV1()

    # Q Agent:
    agent = QLearningAgent(num_states=features_manager.get_num_states(),
                           num_actions=actions_manager.get_num_actions(),
                           learning_rate=0.1,
                           discount_factor=0.9,
                           epsilon=0.6,
                           final_epsilon=0.1)
    agent.load_q_table(model_file)
    # save original q_table
    save_model(q_table=agent.q_table,
               file_name="original_model",
               directory=save_dir)
def do_nothing(game_interface: HFOAttackingPlayer,
               features: DiscreteFeatures1TeammateV1):
    action = (NOOP, )
    status, observation = game_interface.step(action, features.has_ball())
    return status, observation