def train(num_episodes: int, game_interface: HFOAttackingPlayer, features: discrete_features_v2.DiscreteFeaturesV2, agent: QLearningAgentV4, actions: DiscreteActionsV2, reward_funct): for ep in range(num_episodes): # print('<Training> Episode {}/{}:'.format(ep, num_episodes)) aux_positions_names = set() aux_actions_played = set() while game_interface.in_game(): # Update environment features: features.update_features(game_interface.get_state()) curr_state_id = features.get_state_index() has_ball = features.has_ball() # Act: action_idx = agent.act(curr_state_id) hfo_action: tuple = actions.map_action_idx_to_hfo_action( agent_pos=features.get_pos_tuple(), has_ball=has_ball, action_idx=action_idx) # Step: status, observation = game_interface.step(hfo_action, has_ball) reward = reward_funct(status) # Save metrics: agent.save_visited_state(curr_state_id, action_idx) agent.cum_reward += reward aux_positions_names.add(features.get_position_name()) action_name = actions.map_action_to_str(action_idx, has_ball) aux_actions_played.add(action_name) # Update environment features: prev_state_id = curr_state_id features.update_features(observation) curr_state_id = features.get_state_index() agent.store_ep(state_idx=prev_state_id, action_idx=action_idx, reward=reward, next_state_idx=curr_state_id, has_ball=has_ball, done=not game_interface.in_game()) agent.learn() # print(':: Episode: {}; reward: {}; epsilon: {}; positions: {}; ' # 'actions: {}'.format(ep, agent.cum_reward, agent.epsilon, # aux_positions_names, aux_actions_played)) agent.save_metrics(agent.old_q_table, agent.q_table) # Reset player: agent.reset() agent.update_hyper_parameters(episode=ep, num_total_episodes=num_episodes) # Game Reset game_interface.reset() agent.save_model() actions_name = [actions_manager.map_action_to_str(i, has_ball=True) for i in range(agent.num_actions)] agent.export_metrics(training=True, actions_name=actions_name)
def train(num_train_episodes: int, num_total_train_ep: int, game_interface: HFOAttackingPlayer, features: discrete_features_v2.DiscreteFeaturesV2, agent: QLearningAgentV5, actions: DiscreteActionsV5, save_metrics: bool, reward_funct): """ @param num_train_episodes: number of episodes to train in this iteration @param num_total_train_ep: number total of episodes to train @param game_interface: game interface, that manages interactions between both; @param features: features interface, from the observation array, gets the main features for the agent; @param agent: learning agent; @param actions: actions interface; @param save_metrics: flag, if true save the metrics; @param reward_funct: reward function used @return: (QLearningAgentV5) the agent """ for ep in range(num_train_episodes): # Go to origin position: features.update_features(game_interface.get_state()) go_to_origin_position(game_interface=game_interface, features=features, actions=actions) # Start learning loop aux_positions_names = set() aux_actions_played = set() while game_interface.in_game(): # Update environment features: curr_state_id = features.get_state_index() has_ball = features.has_ball() # Act: action_idx = agent.act(curr_state_id) hfo_action_params, num_rep =\ actions.map_action_idx_to_hfo_action( agent_pos=features.get_pos_tuple(), has_ball=has_ball, action_idx=action_idx) # Step: rep_counter_aux = 0 while game_interface.in_game() and rep_counter_aux < num_rep: status, observation = game_interface.step( hfo_action_params, has_ball) rep_counter_aux += 1 reward = reward_funct(status) # Save metrics: if save_metrics: agent.save_visited_state(curr_state_id, action_idx) agent.cum_reward += reward aux_positions_names.add(features.get_position_name()) action_name = actions.map_action_to_str(action_idx, has_ball) aux_actions_played.add(action_name) # Update environment features: prev_state_id = curr_state_id features.update_features(observation) curr_state_id = features.get_state_index() agent.store_ep(state_idx=prev_state_id, action_idx=action_idx, reward=reward, next_state_idx=curr_state_id, has_ball=has_ball, done=not game_interface.in_game()) agent.learn() # print(':: Episode: {}; reward: {}; epsilon: {}; positions: {}; ' # 'actions: {}'.format(ep, agent.cum_reward, agent.epsilon, # aux_positions_names, aux_actions_played)) if save_metrics: agent.save_metrics(agent.old_q_table, agent.q_table) # Reset player: agent.reset() agent.update_hyper_parameters(episode=agent.train_eps, num_total_episodes=num_total_train_ep) # Game Reset game_interface.reset() agent.save_model() if save_metrics: actions_name = [ actions_manager.map_action_to_str(i, has_ball=True) for i in range(agent.num_actions) ] agent.export_metrics(training=True, actions_name=actions_name) return agent