def move_agent(self, action_name, game_interface: HFOAttackingPlayer, features: BaseHighLevelState): """ Agent Moves/Dribbles in a specific direction """ # Get Movement type: action = DRIBBLE_TO if "UP" in action_name: action = (action, features.agent.x_pos, -0.9) elif "DOWN" in action_name: action = (action, features.agent.x_pos, 0.9) elif "LEFT" in action_name: action = (action, -0.8, features.agent.y_pos) elif "RIGHT" in action_name: action = (action, 0.8, features.agent.y_pos) else: raise ValueError("ACTION NAME is WRONG") attempts = 0 while game_interface.in_game() and attempts < self.action_num_episodes: status, observation = game_interface.step(action, features.has_ball()) features.update_features(observation) attempts += 1 return game_interface.get_game_status(), \ game_interface.get_observation_array()
def execute_action(action_params: tuple, repetitions: int, game_interface: HFOAttackingPlayer, has_ball: bool): rep_counter_aux = 0 observation = [] while game_interface.in_game() and rep_counter_aux < repetitions: status, observation = game_interface.step(action_params, has_ball) rep_counter_aux += 1 return game_interface.get_game_status(), observation
def shoot_ball(self, game_interface: HFOAttackingPlayer, features: DiscreteFeatures1Teammate): """ Tries to shoot, if it fail, kicks to goal randomly """ attempts = 0 while game_interface.in_game() and features.has_ball(): if attempts > 3: break elif attempts == 3: # Failed to kick four times # print("Failed to SHOOT 3 times. WILL KICK") y = random.choice([0.17, 0, -0.17]) hfo_action = (KICK_TO, 0.9, y, 2) else: hfo_action = (SHOOT,) _, obs = game_interface.step(hfo_action, features.has_ball()) features.update_features(obs) attempts += 1 return game_interface.get_game_status(), \ game_interface.get_observation_array()
def pass_ball(self, game_interface: HFOAttackingPlayer, features: DiscreteFeatures1Teammate): """ Tries to use the PASS action, if it fails, Kicks in the direction of the teammate""" attempts = 0 while game_interface.in_game() and features.has_ball(): if attempts > 2: break elif attempts == 2: # Failed to pass 2 times # print("Failed to PASS two times. WILL KICK") y = random.choice([0.17, 0, -0.17]) hfo_action = (KICK_TO, 0.9, y, 2) else: hfo_action = (PASS, 11) _, obs = game_interface.step(hfo_action, features.has_ball()) features.update_features(obs) attempts += 1 return game_interface.get_game_status(), \ game_interface.get_observation_array()
def shoot_ball(self, game_interface: HFOAttackingPlayer, features: BaseHighLevelState): """ Tries to shoot, if it fail, kicks to goal randomly """ # Get best shoot angle: angles = [] goalie_coord = np.array([features.opponents[0].x_pos, features.opponents[0].y_pos]) player_coord = np.array(features.get_pos_tuple()) for goal_pos in self.shoot_possible_coord: angles.append(get_angle(goalie=goalie_coord, player=player_coord, point=goal_pos)) idx = int(np.argmax(np.array(angles))) best_shoot_coord = self.shoot_possible_coord[idx] # Action parameters: hfo_action = (KICK_TO, best_shoot_coord[0], best_shoot_coord[1], 2.5) # Step game: _, obs = game_interface.step(hfo_action, features.has_ball()) # Update features: features.update_features(obs) return game_interface.get_game_status(), \ game_interface.get_observation_array()
def move_agent(self, action_name, game_interface: HFOAttackingPlayer, features: DiscFeatures1Teammate): """ Agent Moves/Dribbles in a specific direction """ # print("move_agent!") if "SHORT" in action_name: num_repetitions = 10 elif "LONG" in action_name: num_repetitions = 20 else: raise ValueError("ACTION NAME is WRONG") # Get Movement type: if "MOVE" in action_name: action = MOVE_TO elif "DRIBBLE" in action_name: action = DRIBBLE_TO else: raise ValueError("ACTION NAME is WRONG") if "UP" in action_name: action = (action, features.agent_coord[0], -0.9) elif "DOWN" in action_name: action = (action, features.agent_coord[0], 0.9) elif "LEFT" in action_name: action = (action, -0.8, features.agent_coord[1]) elif "RIGHT" in action_name: action = (action, 0.8, features.agent_coord[1]) else: raise ValueError("ACTION NAME is WRONG") attempts = 0 while game_interface.in_game() and attempts < num_repetitions: status, observation = game_interface.step(action, features.has_ball()) features.update_features(observation) attempts += 1 return game_interface.get_game_status(), \ game_interface.get_observation_array()
def train(num_train_episodes: int, num_total_train_ep: int, game_interface: HFOAttackingPlayer, features: DiscreteFeatures1TeammateV1, agent: QLearningAgent, actions: DiscreteActions1TeammateV1, reward_funct): """ @param num_train_episodes: number of episodes to train in this iteration @param num_total_train_ep: number total of episodes to train @param game_interface: game interface, that manages interactions between both; @param features: features interface, from the observation array, gets the main features for the agent; @param agent: learning agent; @param actions: actions interface; @param reward_funct: reward function used @return: (QLearningAgentV5) the agent """ sum_score = 0 sum_epsilons = 0 agent.counter_explorations = 0 agent.counter_exploitations = 0 for ep in range(num_train_episodes): # Check if server still up: if game_interface.hfo.step() == SERVER_DOWN: raise ServerDownError("training; episode={}".format(ep)) # Go to origin position: features.update_features(game_interface.get_state()) go_to_origin_position(game_interface=game_interface, features=features, actions=actions) # Start learning loop debug_counter = 0 # TODO remove while game_interface.in_game(): # Update environment features: curr_state_id = features.get_state_index() has_ball = features.has_ball() # Act: debug_counter += 1 action_idx = agent.act(curr_state_id) action_name = actions.map_action_to_str(action_idx, has_ball) # print("Agent playing {} for {}".format(action_name, num_rep)) # Step: status = execute_action(action_name=action_name, features=features, game_interface=game_interface) # Update environment features: reward = reward_funct(status) sum_score += reward new_state_id = features.get_state_index() agent.store_ep(state_idx=curr_state_id, action_idx=action_idx, reward=reward, next_state_idx=new_state_id, has_ball=has_ball, done=not game_interface.in_game()) if game_interface.get_game_status() == OUT_OF_TIME: if debug_counter < 5: raise NoActionPlayedError( "agent was only able to choose {}".format(debug_counter)) agent.learn_buffer() agent.update_hyper_parameters(num_total_episodes=num_total_train_ep) sum_epsilons += agent.epsilon # Game Reset game_interface.reset() print("<<TRAIN>> AVR reward = ", sum_score / num_train_episodes) print("<<TRAIN>> %Explorations={}% ".format( round( (agent.counter_explorations / (agent.counter_exploitations + agent.counter_explorations)), 4) * 100))
def test(num_episodes: int, game_interface: HFOAttackingPlayer, features: DiscreteFeatures1TeammateV1, agent: QLearningAgent, actions: DiscreteActions1TeammateV1, reward_funct) -> float: """ @param num_episodes: number of episodes to run @param game_interface: game interface, that manages interactions between both; @param features: features interface, from the observation array, gets the main features for the agent; @param agent: learning agent; @param actions: actions interface; @param reward_funct: reward function used @return: (float) the win rate """ # Run training using Q-Learning num_goals = 0 start_pos_list = list(ORIGIN_POSITIONS.keys()) for ep in range(num_episodes): # Check if server still up: if game_interface.hfo.step() == SERVER_DOWN: print("Server is down while testing; episode={}".format(ep)) break # Go to origin position: features.update_features(game_interface.get_state()) # Set game start condition: if not start_pos_list: start_pos_list = list(ORIGIN_POSITIONS.keys()) start_pos = start_pos_list.pop() go_to_origin_position(game_interface=game_interface, pos_name=start_pos, features=features, actions=actions) # Test loop: debug_counter = 0 # TODO remove while game_interface.in_game(): # Update environment features: curr_state_id = features.get_state_index() has_ball = features.has_ball() # Act: debug_counter += 1 action_idx = agent.act(curr_state_id) action_name = actions.map_action_to_str(action_idx, has_ball) print("Agent playing {}".format(action_name)) # Step: status = execute_action(action_name=action_name, features=features, game_interface=game_interface) # update features: reward = reward_funct(status) num_goals += 1 if reward == 1 else 0 if game_interface.get_game_status() == OUT_OF_TIME: if debug_counter < 5: raise NoActionPlayedError( "agent was only able to choose {}".format(debug_counter)) # Game Reset game_interface.reset() print("<<TEST>> NUM Goals = ", num_goals) print("<<TEST>> NUM episodes = ", (ep + 1)) print("<<TEST>> AVR win rate = ", num_goals / (ep + 1)) return num_goals / num_episodes