def run_single_episode(env: UnityEnvironment,
                       brain_name,
                       agent: Agent = None,
                       train_mode=False,
                       max_t=2000,
                       epsilon=0.0):
    """
    Execute a single episode

    Params
    ======
        env (UnityEnvironment): enviroment
        brain_name (string): default brain name
        agent (Agent): agent that is responsible for control the actions (if no agent, a random action is chosen)
        train_mode (bool): indicate if the environment is on the train mode
        max_t (int): max number of steps in each episode
        epsilon (float): attenuate the noise applied to the action

    Return
    ======
        scores (float): episode scores of all agents
    """
    env_info = env.reset(train_mode=train_mode)[brain_name]
    num_agents = len(env_info.agents)
    action_size = env.brains[brain_name].vector_action_space_size

    states = env_info.vector_observations
    scores = np.zeros(num_agents)  # initialize the score (for each agent)

    # Run all the steps of one episode
    for time_step in range(1, max_t + 1):
        if agent:  # if a agent is provide, get all the action (for each agent)
            actions = agent.act(states, epsilon=epsilon, add_noise=train_mode)
        else:  # select a random action (if no agent)
            actions = np.random.randn(num_agents, action_size)
        actions = np.clip(actions, -1, 1)  # all actions between -1 and 1
        env_info = env.step(actions)[
            brain_name]  # send the action to the environment

        next_states = env_info.vector_observations  # get the next state
        rewards = env_info.rewards  # get the reward
        dones = env_info.local_done  # get the done flag

        if agent and train_mode:  # learn if in train mode
            agent.step(states, actions, rewards, next_states, dones, time_step)

        states = next_states
        scores += rewards  # increase the scores (for each agent)
        if np.any(dones):  # Exit episode if done
            break

    return scores
def run_single_episode(env: UnityEnvironment,
                       brain_name,
                       agent: Agent = None,
                       max_t=1000,
                       eps=0.,
                       train_mode=False):
    """
    Execute a single episode

    Params
    ======
        env (UnityEnvironment): enviroment
        brain_name (string): default brain name
        agent (Agent): agent that is responsible for control the actions (if no agent, a random action is chosen)
        max_t (int): max steps in each episode
        train_mode (bool): indicate if the environment is on the train mode

    Return
    ======
        score (float): total score of episode
    """
    env_info = env.reset(train_mode=train_mode)[brain_name]
    action_size = env.brains[brain_name].vector_action_space_size
    state = env_info.vector_observations[0]

    score = 0
    for _ in range(max_t):  # Run each step in episode
        action = agent.act(state,
                           eps) if agent else np.random.randint(action_size)

        env_info = env.step(action)[
            brain_name]  # send the action to the environment

        next_state = env_info.vector_observations[0]  # get the next state
        reward = env_info.rewards[0]  # get the reward
        done = env_info.local_done[0]  # get the done flag

        if agent and train_mode:
            agent.step(state, action, reward, next_state, done)

        state = next_state
        score += reward
        if done:  # Exit episode if done
            break

    return score