def test(train_ep: int, num_episodes: int, game_interface: HFOAttackingPlayer, features: DiscreteHighLevelFeatures, agent: QLearningAgent, actions: ActionManager): # Run training using Q-Learning score = 0 agent.test_episodes.append(train_ep) for ep in range(num_episodes): print('<Test> {}/{}:'.format(ep, num_episodes)) while game_interface.in_game(): # Update environment features: observation = game_interface.get_state() curr_state_id = features.get_state_index(observation) has_ball = features.has_ball(observation) # Act: action_idx = agent.exploit_actions(curr_state_id) hfo_action = actions.map_action(action_idx) # Step: status, observation = game_interface.step(hfo_action, has_ball) # Save Metrics: agent.save_visited_state(curr_state_id, action_idx) agent.cum_reward += reward_function(status) print(':: Episode: {}; reward: {}'.format(ep, agent.cum_reward)) score += 1 if game_interface.status == GOAL else 0 # Reset player: agent.reset(training=False) # Game Reset game_interface.reset() agent.scores.append(score) actions_name = [ actions_manager.map_action_to_str(i) for i in range(agent.num_actions) ] agent.export_metrics(training=False, actions_name=actions_name)
def __init__(self, num_opponents: int, num_teammates: int, port: int = 6000, online: bool = True): # Game Interface: self.game_interface = HFOAttackingPlayer(num_opponents=num_opponents, num_teammates=num_teammates, port=port) if online: self.game_interface.connect_to_server() # Features Interface: self.features = PlasticFeatures(num_op=num_opponents, num_team=num_teammates) # Actions Interface: self.actions = Actions() # Agent instance: self.agent = DQNAgent(num_features=self.features.num_features, num_actions=self.actions.get_num_actions(), learning_rate=0.005, discount_factor=0.99, epsilon=1, final_epsilon=0.001, epsilon_decay=0.99995, tau=0.125)
def move_agent(self, action_name, game_interface: HFOAttackingPlayer, features: BaseHighLevelState): """ Agent Moves/Dribbles in a specific direction """ # Get Movement type: action = DRIBBLE_TO if "UP" in action_name: action = (action, features.agent.x_pos, -0.9) elif "DOWN" in action_name: action = (action, features.agent.x_pos, 0.9) elif "LEFT" in action_name: action = (action, -0.8, features.agent.y_pos) elif "RIGHT" in action_name: action = (action, 0.8, features.agent.y_pos) else: raise ValueError("ACTION NAME is WRONG") attempts = 0 while game_interface.in_game() and attempts < self.action_num_episodes: status, observation = game_interface.step(action, features.has_ball()) features.update_features(observation) attempts += 1 return game_interface.get_game_status(), \ game_interface.get_observation_array()
def move_agent(action_name, game_interface: HFOAttackingPlayer, features: DiscreteFeatures1TeammateV1): # print("move_agent!") if "SHORT" in action_name: num_repetitions = 10 elif "LONG" in action_name: num_repetitions = 20 else: raise ValueError("ACTION NAME is WRONG") # Get Movement type: if "MOVE" in action_name: action = MOVE_TO elif "DRIBBLE" in action_name: action = DRIBBLE_TO else: raise ValueError("ACTION NAME is WRONG") if "UP" in action_name: action = (action, features.agent_coord[0], -0.9) elif "DOWN" in action_name: action = (action, features.agent_coord[0], 0.9) elif "LEFT" in action_name: action = (action, -0.8, features.agent_coord[1]) elif "RIGHT" in action_name: action = (action, 0.8, features.agent_coord[1]) else: raise ValueError("ACTION NAME is WRONG") attempts = 0 while game_interface.in_game() and attempts < num_repetitions: status, observation = game_interface.step(action, features.has_ball()) features.update_features(observation) attempts += 1 return status, observation
def test(num_episodes: int, game_interface: HFOAttackingPlayer, features: DiscreteFeatures, agent: QLearningAgentTest, actions: DiscreteActionsV5, reward_funct) -> float: """ @param num_episodes: number of episodes to run @param game_interface: game interface, that manages interactions between both; @param features: features interface, from the observation array, gets the main features for the agent; @param agent: learning agent; @param actions: actions interface; @param reward_funct: reward function used @return: (float) the average reward """ # Run training using Q-Learning sum_score = 0 for ep in range(num_episodes): # Check if server still up: if game_interface.hfo.step() == SERVER_DOWN: raise ServerDownError("testing; episode={}".format(ep)) # Go to origin position: features.update_features(game_interface.get_state()) go_to_origin_position(game_interface=game_interface, features=features, actions=actions) # Test loop: debug_counter = 0 # TODO remove while game_interface.in_game(): # Update environment features: curr_state_id = features.get_state_index() has_ball = features.has_ball() if not has_ball: hfo_action_params = GO_TO_BALL num_rep = 5 else: # Act: debug_counter += 1 action_idx = agent.exploit_actions(curr_state_id) hfo_action_params, num_rep = \ actions.map_action_idx_to_hfo_action( agent_pos=features.get_pos_tuple(), has_ball=has_ball, action_idx=action_idx) # Step: status, observation = execute_action( action_params=hfo_action_params, repetitions=num_rep, has_ball=has_ball, game_interface=game_interface) # update features: reward = reward_funct(status) features.update_features(observation) sum_score += reward # Game Reset game_interface.reset() print("## AVR Test reward = ", sum_score / num_episodes) return sum_score / num_episodes
def execute_action(action_params: tuple, repetitions: int, game_interface: HFOAttackingPlayer, has_ball: bool): rep_counter_aux = 0 observation = [] while game_interface.in_game() and rep_counter_aux < repetitions: status, observation = game_interface.step(action_params, has_ball) rep_counter_aux += 1 return game_interface.get_game_status(), observation
def test(num_episodes: int, game_interface: HFOAttackingPlayer, features: DiscreteFeatures1TeammateV1, agent: QLearningAgent, actions: DiscreteActions1TeammateV1, reward_funct) -> float: """ @param num_episodes: number of episodes to run @param game_interface: game interface, that manages interactions between both; @param features: features interface, from the observation array, gets the main features for the agent; @param agent: learning agent; @param actions: actions interface; @param reward_funct: reward function used @return: (float) the win rate """ # Run training using Q-Learning num_goals = 0 for ep in range(num_episodes): # Check if server still up: if game_interface.hfo.step() == SERVER_DOWN: print("Server is down while testing; episode={}".format(ep)) break # Go to origin position: features.update_features(game_interface.get_state()) go_to_origin_position(game_interface=game_interface, features=features, actions=actions) # Test loop: debug_counter = 0 # TODO remove while game_interface.in_game(): # Update environment features: curr_state_id = features.get_state_index() has_ball = features.has_ball() # Act: debug_counter += 1 action_idx = agent.act(curr_state_id) action_name = actions.map_action_to_str(action_idx, has_ball) print("Agent playing {}".format(action_name)) # Step: status = execute_action(action_name=action_name, features=features, game_interface=game_interface) # update features: reward = reward_funct(status) num_goals += 1 if reward == 1 else 0 if status == OUT_OF_TIME: if debug_counter < 5: raise NoActionPlayedError( "agent was only able to choose {}".format(debug_counter)) # Game Reset game_interface.reset() print("<<TEST>> NUM Goals = ", num_goals) print("<<TEST>> NUM episodes = ", (ep + 1)) print("<<TEST>> AVR win rate = ", num_goals / (ep + 1)) return num_goals / num_episodes
def train(num_episodes: int, game_interface: HFOAttackingPlayer, features: discrete_features_v2.DiscreteFeaturesV2, agent: QLearningAgent, actions: DiscreteActions, reward_funct): for ep in range(num_episodes): print('<Training> Episode {}/{}:'.format(ep, num_episodes)) aux_positions_names = set() aux_actions_played = set() while game_interface.in_game(): # Update environment features: features.update_features(game_interface.get_state()) curr_state_id = features.get_state_index() has_ball = features.has_ball() # Act: action_idx = agent.act(curr_state_id) aux_actions_played.add(actions.map_action_to_str(action_idx)) hfo_action: tuple = actions.map_action_idx_to_hfo_action( features.get_pos_tuple(), action_idx) # Step: status, observation = game_interface.step(hfo_action, has_ball) reward = reward_funct(status) # Save metrics: agent.save_visited_state(curr_state_id, action_idx) agent.cum_reward += reward aux_positions_names.add(features.get_position_name()) # Update environment features: prev_state_id = curr_state_id features.update_features(observation) curr_state_id = features.get_state_index() # Update agent agent.learn(prev_state_id, action_idx, reward, status, curr_state_id) print(':: Episode: {}; reward: {}; positions: {}; actions: {}'.format( ep, agent.cum_reward, aux_positions_names, aux_actions_played)) agent.save_metrics(agent.old_q_table, agent.q_table) # Reset player: agent.reset() agent.update_hyper_parameters() # Game Reset game_interface.reset() agent.save_model() actions_name = [ actions_manager.map_action_to_str(i) for i in range(agent.num_actions) ] agent.export_metrics(training=True, actions_name=actions_name)
def go_to_origin_position(game_interface: HFOAttackingPlayer, features: DiscreteFeatures1TeammateV1, actions: DiscreteActions1TeammateV1, pos_name: str = None): if pos_name: origin_pos = ORIGIN_POSITIONS[pos_name] else: pos_name, origin_pos = random.choice(list(ORIGIN_POSITIONS.items())) # print("\nMoving to starting point: {0}".format(pos_name)) pos = features.get_pos_tuple(round_ndigits=1) while origin_pos != pos: has_ball = features.has_ball() hfo_action: tuple = actions.dribble_to_pos(origin_pos) status, observation = game_interface.step(hfo_action, has_ball) features.update_features(observation) pos = features.get_pos_tuple(round_ndigits=1) # Informs the teammate that it is ready to start the game teammate_last_coord = features.teammate_coord.copy() counter = 0 while teammate_last_coord.tolist() == features.teammate_coord.tolist(): if counter >= 10: # print("STOP repeating the message") break game_interface.hfo.say(settings.PLAYER_READY_MSG) game_interface.hfo.step() observation = game_interface.hfo.getState() features.update_features(observation) # print("Action said READY!") counter += 1
def __init__(self, num_opponents: int, num_teammates: int, port: int = 6000): # Game Interface: self.game_interface = HFOAttackingPlayer(num_opponents=num_opponents, num_teammates=num_teammates, port=port) self.game_interface.connect_to_server() # Features Interface: self.features = DiscFeatures1Teammate(num_op=num_opponents, num_team=num_teammates) # Actions Interface: self.actions = Actions() # Agent instance: self.agent = QAgent(num_features=self.features.num_features, num_actions=self.actions.get_num_actions(), learning_rate=0.1, discount_factor=0.9, epsilon=0.8)
def kick_to_pos(self, pos: tuple, features: BaseHighLevelState, game_interface: HFOAttackingPlayer): """ The agent kicks to position expected """ hfo_action = (KICK_TO, pos[0], pos[1], 2) status, observation = game_interface.step(hfo_action, features.has_ball()) # Update features: features.update_features(observation)
def pass_ball(game_interface: HFOAttackingPlayer, features: DiscreteFeatures1TeammateV1): # print("pass_ball!") attempts = 0 while game_interface.in_game() and features.has_ball(): if attempts > 2: break elif attempts == 2: # Failed to pass 2 times print("Failed to PASS two times. WILL KICK") y = random.choice([0.17, 0, -0.17]) hfo_action = (KICK_TO, 0.9, y, 2) else: hfo_action = (PASS, 11) status, observation = game_interface.step(hfo_action, features.has_ball()) features.update_features(observation) attempts += 1 return status, observation
def shoot_ball(game_interface: HFOAttackingPlayer, features: DiscreteFeatures1TeammateV1): # print("shoot_ball!") attempts = 0 while game_interface.in_game() and features.has_ball(): if attempts > 3: break elif attempts == 3: # Failed to kick four times print("Failed to SHOOT 3 times. WILL KICK") y = random.choice([0.17, 0, -0.17]) hfo_action = (KICK_TO, 0.9, y, 2) else: hfo_action = (SHOOT, ) status, observation = game_interface.step(hfo_action, features.has_ball()) features.update_features(observation) attempts += 1 return status, observation
def shoot_ball(self, game_interface: HFOAttackingPlayer, features: DiscreteFeatures1Teammate): """ Tries to shoot, if it fail, kicks to goal randomly """ attempts = 0 while game_interface.in_game() and features.has_ball(): if attempts > 3: break elif attempts == 3: # Failed to kick four times # print("Failed to SHOOT 3 times. WILL KICK") y = random.choice([0.17, 0, -0.17]) hfo_action = (KICK_TO, 0.9, y, 2) else: hfo_action = (SHOOT,) _, obs = game_interface.step(hfo_action, features.has_ball()) features.update_features(obs) attempts += 1 return game_interface.get_game_status(), \ game_interface.get_observation_array()
def train(num_episodes: int, game_interface: HFOAttackingPlayer, features: DiscreteHighLevelFeatures, agent: QLearningAgent, actions: ActionManager): for ep in range(num_episodes): print('<Training> Episode {}/{}:'.format(ep, num_episodes)) while game_interface.in_game(): # Update environment features: observation = game_interface.get_state() curr_state_id = features.get_state_index(observation) has_ball = features.has_ball(observation) # Act: action_idx = agent.act(curr_state_id) hfo_action = actions.map_action(action_idx) # Step: status, observation = game_interface.step(hfo_action, has_ball) reward = reward_function(status) # Save metrics: agent.save_visited_state(curr_state_id, action_idx) agent.cum_reward += reward # Update environment features: prev_state_id = curr_state_id curr_state_id = features.get_state_index(observation) # Update agent agent.learn(prev_state_id, action_idx, reward, status, curr_state_id) print(':: Episode: {}; reward: {}'.format(ep, agent.cum_reward)) agent.save_metrics(agent.old_q_table, agent.q_table) # Reset player: agent.reset() agent.update_hyper_parameters() # Game Reset game_interface.reset() agent.save_model() actions_name = [ actions_manager.map_action_to_str(i) for i in range(agent.num_actions) ] agent.export_metrics(training=False, actions_name=actions_name)
def pass_ball(self, game_interface: HFOAttackingPlayer, features: DiscreteFeatures1Teammate): """ Tries to use the PASS action, if it fails, Kicks in the direction of the teammate""" attempts = 0 while game_interface.in_game() and features.has_ball(): if attempts > 2: break elif attempts == 2: # Failed to pass 2 times # print("Failed to PASS two times. WILL KICK") y = random.choice([0.17, 0, -0.17]) hfo_action = (KICK_TO, 0.9, y, 2) else: hfo_action = (PASS, 11) _, obs = game_interface.step(hfo_action, features.has_ball()) features.update_features(obs) attempts += 1 return game_interface.get_game_status(), \ game_interface.get_observation_array()
def dribble_to_pos(self, pos: tuple, features: DiscreteFeatures1Teammate, game_interface: HFOAttackingPlayer): """ The agent keeps dribbling until reach the position expected """ curr_pos = features.get_pos_tuple(round_ndigits=1) while pos != curr_pos: hfo_action = (DRIBBLE_TO, pos[0], pos[1]) status, observation = game_interface.step(hfo_action, features.has_ball()) # Update features: features.update_features(observation) curr_pos = features.get_pos_tuple(round_ndigits=1)
def move_to_pos(self, pos: tuple, features: BaseHighLevelState, game_interface: HFOAttackingPlayer): """ The agent keeps moving until reach the position expected """ curr_pos = features.get_pos_tuple(round_ndigits=1) while pos != curr_pos: hfo_action = (MOVE_TO, pos[0], pos[1]) status, observation = game_interface.step(hfo_action, features.has_ball()) # Update features: features.update_features(observation) curr_pos = features.get_pos_tuple(round_ndigits=1)
def shoot_ball(self, game_interface: HFOAttackingPlayer, features: BaseHighLevelState): """ Tries to shoot, if it fail, kicks to goal randomly """ # Get best shoot angle: angles = [] goalie_coord = np.array([features.opponents[0].x_pos, features.opponents[0].y_pos]) player_coord = np.array(features.get_pos_tuple()) for goal_pos in self.shoot_possible_coord: angles.append(get_angle(goalie=goalie_coord, player=player_coord, point=goal_pos)) idx = int(np.argmax(np.array(angles))) best_shoot_coord = self.shoot_possible_coord[idx] # Action parameters: hfo_action = (KICK_TO, best_shoot_coord[0], best_shoot_coord[1], 2.5) # Step game: _, obs = game_interface.step(hfo_action, features.has_ball()) # Update features: features.update_features(obs) return game_interface.get_game_status(), \ game_interface.get_observation_array()
def test(train_ep: int, num_episodes: int, game_interface: HFOAttackingPlayer, features: discrete_features_v2.DiscreteFeaturesV2, agent: QLearningAgentV4, actions: DiscreteActionsV2, reward_funct): # Run training using Q-Learning score = 0 agent.test_episodes.append(train_ep) for ep in range(num_episodes): print('<Test> {}/{}:'.format(ep, num_episodes)) prev_state_id =-1 while game_interface.in_game(): # Update environment features: features.update_features(game_interface.get_state()) curr_state_id = features.get_state_index() has_ball = features.has_ball() # Act: if prev_state_id != curr_state_id: print([round(val, 2) for val in agent.q_table[curr_state_id]]) action_idx = agent.exploit_actions(curr_state_id) hfo_action: tuple = actions.map_action_idx_to_hfo_action( agent_pos=features.get_pos_tuple(), has_ball=has_ball, action_idx=action_idx) # Step: status, observation = game_interface.step(hfo_action, has_ball) prev_state_id = curr_state_id # Save Metrics: agent.save_visited_state(curr_state_id, action_idx) agent.cum_reward += reward_funct(status) print(':: Episode: {}; reward: {}'.format(ep, agent.cum_reward)) score += 1 if game_interface.status == GOAL else 0 # Reset player: agent.reset(training=False) # Game Reset game_interface.reset() agent.scores.append(score) actions_name = [actions_manager.map_action_to_str(i, has_ball=True) for i in range(agent.num_actions)] agent.export_metrics(training=False, actions_name=actions_name)
def test(num_episodes: int, game_interface: HFOAttackingPlayer, features: discrete_features_v2.DiscreteFeaturesV2, agent: QLearningAgentV5, actions: DiscreteActionsV5, reward_funct): """ @param num_episodes: number of episodes to run @param game_interface: game interface, that manages interactions between both; @param features: features interface, from the observation array, gets the main features for the agent; @param agent: learning agent; @param actions: actions interface; @param reward_funct: reward function used @return: (int) the avarage reward """ # Run training using Q-Learning sum_score = 0 for ep in range(num_episodes): print('<Test> {}/{}:'.format(ep, num_episodes)) # Go to origin position: features.update_features(game_interface.get_state()) go_to_origin_position(game_interface=game_interface, features=features, actions=actions) # Test loop: while game_interface.in_game(): # Update environment features: curr_state_id = features.get_state_index() has_ball = features.has_ball() # Act: action_idx = agent.exploit_actions(curr_state_id) hfo_action_params, num_rep = \ actions.map_action_idx_to_hfo_action( agent_pos=features.get_pos_tuple(), has_ball=has_ball, action_idx=action_idx) action_name = actions.map_action_to_str(action_idx, has_ball) # Step: rep_counter_aux = 0 while game_interface.in_game() and rep_counter_aux < num_rep: status, observation = game_interface.step( hfo_action_params, has_ball) rep_counter_aux += 1 reward = reward_funct(status) # update features: features.update_features(observation) # Save metrics: agent.save_visited_state(curr_state_id, action_idx) sum_score += reward # Reset player: agent.reset(training=False) # Game Reset game_interface.reset() return sum_score / num_episodes
def go_to_origin_position(game_interface: HFOAttackingPlayer, features: DiscreteFeatures, actions: DiscreteActionsV5, random_start: bool = True): if random_start: pos_name, origin_pos = random.choice(list(ORIGIN_POSITIONS.items())) else: pos_name = "Fixed start" origin_pos = features.get_pos_tuple() print("Moving to starting point: {0}".format(pos_name)) pos = features.get_pos_tuple(round_ndigits=1) while origin_pos != pos: has_ball = features.has_ball() hfo_action: tuple = actions.dribble_to_pos(origin_pos) status, observation = game_interface.step(hfo_action, has_ball) features.update_features(observation) pos = features.get_pos_tuple(round_ndigits=1)
def dribble_to_pos(self, pos: tuple, features: BaseHighLevelState, game_interface: HFOAttackingPlayer): """ The agent keeps dribbling until reach the position expected """ def check_valid_pos(pos_tuple: tuple): for pos_aux in pos_tuple: try: num_digits = len(str(pos_aux).split(".")[1]) if num_digits >= 2: return False except IndexError: pass return True if check_valid_pos(pos) is False: raise Exception("Initial positions invalid. Initial positions " "should be a float with 1 digit or less") curr_pos = features.get_pos_tuple(round_ndigits=1) while pos != curr_pos: hfo_action = (DRIBBLE_TO, pos[0], pos[1]) status, observation = game_interface.step(hfo_action, features.has_ball()) # Update features: features.update_features(observation) curr_pos = features.get_pos_tuple(round_ndigits=1)
def do_nothing(self, game_interface: HFOAttackingPlayer, features: DiscreteFeatures1Teammate): action = (NOOP,) status, observation = game_interface.step(action, features.has_ball()) return status, observation
class Player: def __init__(self, num_opponents: int, num_teammates: int): # Game Interface: self.game_interface = HFOAttackingPlayer(num_opponents=num_opponents, num_teammates=num_teammates) self.game_interface.connect_to_server() # Features Interface: self.features = DiscreteFeatures1Teammate(num_op=num_opponents, num_team=num_teammates) # Actions Interface: self.actions = DiscreteActionsModule() # Agent instance: self.agent = QAgent(num_features=self.features.num_features, num_actions=self.actions.get_num_actions(), learning_rate=0.1, discount_factor=0.9, epsilon=1, final_epsilon=0.3) def get_reward(self, status: int) -> int: return basic_reward(status) def set_starting_game_conditions(self, game_interface: HFOAttackingPlayer, features: DiscreteFeatures1Teammate, start_with_ball: bool = True, start_pos: tuple = None): """ Set starting game conditions. Move for initial position, for example """ if not start_pos: pos_name, start_pos = random.choice( list(STARTING_POSITIONS.items())) if start_with_ball: # Move to starting position: self.actions.dribble_to_pos(start_pos, features, game_interface) else: if self.features.has_ball(): self.actions.kick_to_pos((0, 0), features, game_interface) # Move to starting position: self.actions.move_to_pos(start_pos, features, game_interface) # Informs the other players that it is ready to start: game_interface.hfo.say(settings.PLAYER_READY_MSG) def train(self, num_train_episodes: int, num_total_train_ep: int, start_with_ball: bool = True): """ @param num_train_episodes: number of episodes to train in this iteration @param num_total_train_ep: number total of episodes to train @param start_with_ball: bool @raise ServerDownError @return: (QLearningAgentV5) the agent """ # metrics variables: _num_wins = 0 _sum_epsilons = 0 for ep in range(num_train_episodes): # Check if server still running: self.game_interface.check_server_is_up() # Update features: self.features.update_features(self.game_interface.get_state()) # Go to origin position: self.set_starting_game_conditions( game_interface=self.game_interface, features=self.features, start_with_ball=start_with_ball) # Start learning loop goal = False # bool flag while self.game_interface.in_game(): # Update environment features: features_array = self.features.get_features().copy() # Act: action_idx = self.agent.act(features_array) status = self.actions.execute_action( action_idx=action_idx, features=self.features, game_interface=self.game_interface) # Every step we update replay memory and train main network done = not self.game_interface.in_game() goal = self.game_interface.scored_goal() self.agent.store_transition( curr_st=features_array, action_idx=action_idx, reward=self.get_reward(status), new_st=self.features.get_features(), done=done) # Train self.agent.train(goal) # Update auxiliar variables: _sum_epsilons += self.agent.epsilon _num_wins += 1 if self.game_interface.scored_goal() else 0 # Update Agent: self.agent.restart(num_total_train_ep) # Game Reset self.game_interface.reset() print("[TRAIN: Summary] WIN rate = {}; AVR epsilon = {}".format( _num_wins / num_train_episodes, _sum_epsilons / num_train_episodes)) def test(self, num_episodes: int, start_with_ball: bool = True, training: bool = False) -> float: """ @param num_episodes: number of episodes to run @param start_with_ball: flag @param training: flag @return: (float) the win rate """ starting_pos_list = list(STARTING_POSITIONS.values()) # metrics variables: _num_wins = 0 for ep in range(num_episodes): # Check if server still running: self.game_interface.check_server_is_up() # Update features: self.features.update_features(self.game_interface.get_state()) # Set up gaming conditions: self.set_starting_game_conditions( game_interface=self.game_interface, features=self.features, start_pos=starting_pos_list[ep % len(starting_pos_list)], start_with_ball=start_with_ball) # Start learning loop prev_action_idx = None while self.game_interface.in_game(): # Update environment features: features_array = self.features.get_features().copy() # Act: action_idx = self.agent.exploit_actions(features_array) if prev_action_idx != action_idx and not training: print("ACTION:: {}".format( self.actions.map_action_to_str( action_idx, self.features.has_ball()))) prev_action_idx = action_idx self.actions.execute_action(action_idx=action_idx, features=self.features, game_interface=self.game_interface) # Update auxiliar variables: _num_wins += 1 if self.game_interface.scored_goal() else 0 # Game Reset self.game_interface.reset() avr_win_rate = _num_wins / num_episodes print("[TEST: Summary] WIN rate = {};".format(avr_win_rate)) return avr_win_rate
def no_ball_action(self, game_interface: HFOAttackingPlayer, features: BaseHighLevelState) -> int: action = (MOVE, ) status, observation = game_interface.step(action, features.has_ball()) features.update_features(observation) return status
def do_nothing(self, game_interface: HFOAttackingPlayer, features: BaseHighLevelState): action = (NOOP, ) status, observation = game_interface.step(action, features.has_ball()) return status, observation
parser.add_argument('--num_episodes', type=int, default=500) parser.add_argument('--save_file', type=str, default=None) args = parser.parse_args() agent_id = args.id num_team = args.num_teammates num_op = args.num_opponents num_episodes = args.num_episodes saving_file = args.save_file print("Starting Training - id={}; num_opponents={}; num_teammates={}; " "num_episodes={}; saveFile={};".format(agent_id, num_op, num_team, num_episodes, saving_file)) # Initialize connection with the HFO server hfo_interface = HFOAttackingPlayer(agent_id=agent_id, num_opponents=num_op, num_teammates=num_team) hfo_interface.connect_to_server() # Reward Function reward_function = simple_reward # Get number of features and actions features_manager = DiscreteHighLevelFeatures(num_team, num_op) actions_manager = ActionManager([SHOOT, MOVE, DRIBBLE]) # Initialize a Q-Learning Agent agent = QLearningAgent(num_states=features_manager.get_num_states(), num_actions=actions_manager.get_num_actions(), learning_rate=0.1, discount_factor=0.99,
def train(num_train_episodes: int, num_total_train_ep: int, game_interface: HFOAttackingPlayer, features: DiscreteFeatures1TeammateV1, agent: QLearningAgent, actions: DiscreteActions1TeammateV1, reward_funct): """ @param num_train_episodes: number of episodes to train in this iteration @param num_total_train_ep: number total of episodes to train @param game_interface: game interface, that manages interactions between both; @param features: features interface, from the observation array, gets the main features for the agent; @param agent: learning agent; @param actions: actions interface; @param reward_funct: reward function used @return: (QLearningAgentV5) the agent """ sum_score = 0 sum_epsilons = 0 agent.counter_explorations = 0 agent.counter_exploitations = 0 for ep in range(num_train_episodes): # Check if server still up: if game_interface.hfo.step() == SERVER_DOWN: raise ServerDownError("training; episode={}".format(ep)) # Go to origin position: features.update_features(game_interface.get_state()) go_to_origin_position(game_interface=game_interface, features=features, actions=actions) # Start learning loop debug_counter = 0 # TODO remove while game_interface.in_game(): # Update environment features: curr_state_id = features.get_state_index() has_ball = features.has_ball() # Act: debug_counter += 1 action_idx = agent.act(curr_state_id) action_name = actions.map_action_to_str(action_idx, has_ball) # print("Agent playing {} for {}".format(action_name, num_rep)) # Step: status = execute_action(action_name=action_name, features=features, game_interface=game_interface) # Update environment features: reward = reward_funct(status) sum_score += reward new_state_id = features.get_state_index() agent.store_ep(state_idx=curr_state_id, action_idx=action_idx, reward=reward, next_state_idx=new_state_id, has_ball=has_ball, done=not game_interface.in_game()) if game_interface.get_game_status() == OUT_OF_TIME: if debug_counter < 5: raise NoActionPlayedError( "agent was only able to choose {}".format(debug_counter)) agent.learn_buffer() agent.update_hyper_parameters(num_total_episodes=num_total_train_ep) sum_epsilons += agent.epsilon # Game Reset game_interface.reset() print("<<TRAIN>> AVR reward = ", sum_score / num_train_episodes) print("<<TRAIN>> %Explorations={}% ".format( round( (agent.counter_explorations / (agent.counter_exploitations + agent.counter_explorations)), 4) * 100))
args = parser.parse_args() num_team = args.num_teammates num_op = args.num_opponents num_train_ep = args.num_train_ep num_test_ep = args.num_test_ep num_repetitions = args.num_repetitions num_episodes = (num_train_ep + num_test_ep) * num_repetitions # Load Model model_file = args.model_file # Directory save_dir = args.save_dir or mkdir( num_episodes, num_op, extra_note="retrain") # Initialize connection with the HFO server hfo_interface = HFOAttackingPlayer(num_opponents=num_op, num_teammates=num_team) hfo_interface.connect_to_server() print("Starting Training - id={}; num_opponents={}; num_teammates={}; " "num_episodes={};".format(hfo_interface.hfo.getUnum(), num_op, num_team, num_episodes)) # Agent set-up reward_function = basic_reward features_manager = DiscreteFeatures1TeammateV1(num_team, num_op) actions_manager = DiscreteActions1TeammateV1() # Q Agent: agent = QLearningAgent(num_states=features_manager.get_num_states(), num_actions=actions_manager.get_num_actions(), learning_rate=0.1, discount_factor=0.9,