示例#1
0
def train(num_episodes: int, game_interface: HFOAttackingPlayer,
          features: discrete_features_v2.DiscreteFeaturesV2,
          agent: QLearningAgentV4, actions: DiscreteActionsV2, reward_funct):
    for ep in range(num_episodes):
        # print('<Training> Episode {}/{}:'.format(ep, num_episodes))
        aux_positions_names = set()
        aux_actions_played = set()
        while game_interface.in_game():
            # Update environment features:
            features.update_features(game_interface.get_state())
            curr_state_id = features.get_state_index()
            has_ball = features.has_ball()
            
            # Act:
            action_idx = agent.act(curr_state_id)
            hfo_action: tuple = actions.map_action_idx_to_hfo_action(
                agent_pos=features.get_pos_tuple(), has_ball=has_ball,
                action_idx=action_idx)
            
            # Step:
            status, observation = game_interface.step(hfo_action, has_ball)
            reward = reward_funct(status)
            
            # Save metrics:
            agent.save_visited_state(curr_state_id, action_idx)
            agent.cum_reward += reward
            aux_positions_names.add(features.get_position_name())
            action_name = actions.map_action_to_str(action_idx, has_ball)
            aux_actions_played.add(action_name)
            
            # Update environment features:
            prev_state_id = curr_state_id
            features.update_features(observation)
            curr_state_id = features.get_state_index()
            agent.store_ep(state_idx=prev_state_id, action_idx=action_idx,
                           reward=reward, next_state_idx=curr_state_id,
                           has_ball=has_ball, done=not game_interface.in_game())
        agent.learn()
        # print(':: Episode: {}; reward: {}; epsilon: {}; positions: {}; '
        #       'actions: {}'.format(ep, agent.cum_reward, agent.epsilon,
        #                            aux_positions_names, aux_actions_played))
        agent.save_metrics(agent.old_q_table, agent.q_table)
        # Reset player:
        agent.reset()
        agent.update_hyper_parameters(episode=ep,
                                      num_total_episodes=num_episodes)
        # Game Reset
        game_interface.reset()
    agent.save_model()
    actions_name = [actions_manager.map_action_to_str(i, has_ball=True) for i in
                    range(agent.num_actions)]
    agent.export_metrics(training=True, actions_name=actions_name)
示例#2
0
def train(num_train_episodes: int, num_total_train_ep: int,
          game_interface: HFOAttackingPlayer,
          features: discrete_features_v2.DiscreteFeaturesV2,
          agent: QLearningAgentV5, actions: DiscreteActionsV5,
          save_metrics: bool, reward_funct):
    """
    @param num_train_episodes: number of episodes to train in this iteration
    @param num_total_train_ep: number total of episodes to train
    @param game_interface: game interface, that manages interactions
    between both;
    @param features: features interface, from the observation array, gets
    the main features for the agent;
    @param agent: learning agent;
    @param actions: actions interface;
    @param save_metrics: flag, if true save the metrics;
    @param reward_funct: reward function used
    @return: (QLearningAgentV5) the agent
    """
    for ep in range(num_train_episodes):
        # Go to origin position:
        features.update_features(game_interface.get_state())
        go_to_origin_position(game_interface=game_interface,
                              features=features,
                              actions=actions)
        # Start learning loop
        aux_positions_names = set()
        aux_actions_played = set()
        while game_interface.in_game():
            # Update environment features:
            curr_state_id = features.get_state_index()
            has_ball = features.has_ball()

            # Act:
            action_idx = agent.act(curr_state_id)
            hfo_action_params, num_rep =\
                actions.map_action_idx_to_hfo_action(
                    agent_pos=features.get_pos_tuple(), has_ball=has_ball,
                    action_idx=action_idx)

            # Step:
            rep_counter_aux = 0
            while game_interface.in_game() and rep_counter_aux < num_rep:
                status, observation = game_interface.step(
                    hfo_action_params, has_ball)
                rep_counter_aux += 1
            reward = reward_funct(status)

            # Save metrics:
            if save_metrics:
                agent.save_visited_state(curr_state_id, action_idx)
                agent.cum_reward += reward
                aux_positions_names.add(features.get_position_name())
                action_name = actions.map_action_to_str(action_idx, has_ball)
                aux_actions_played.add(action_name)

            # Update environment features:
            prev_state_id = curr_state_id
            features.update_features(observation)
            curr_state_id = features.get_state_index()
            agent.store_ep(state_idx=prev_state_id,
                           action_idx=action_idx,
                           reward=reward,
                           next_state_idx=curr_state_id,
                           has_ball=has_ball,
                           done=not game_interface.in_game())
        agent.learn()
        # print(':: Episode: {}; reward: {}; epsilon: {}; positions: {}; '
        #       'actions: {}'.format(ep, agent.cum_reward, agent.epsilon,
        #                            aux_positions_names, aux_actions_played))
        if save_metrics:
            agent.save_metrics(agent.old_q_table, agent.q_table)
        # Reset player:
        agent.reset()
        agent.update_hyper_parameters(episode=agent.train_eps,
                                      num_total_episodes=num_total_train_ep)
        # Game Reset
        game_interface.reset()
    agent.save_model()
    if save_metrics:
        actions_name = [
            actions_manager.map_action_to_str(i, has_ball=True)
            for i in range(agent.num_actions)
        ]
        agent.export_metrics(training=True, actions_name=actions_name)
    return agent