Python Agent.act примеры использования

Язык программирования: Python

Пространство имен/Пакет: models

Класс/Тип: Agent

Метод/Функция: act

Примеров на hotexamples.com: 2

Python Agent.act - 2 примера найдено. Это лучшие примеры Python кода для models.Agent.act, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Agent(26)

get(22)

direction(5)

last_online(2)

hostname(2)

act(2)

geolocation(2)

destroySelf(2)

exist_inn(1)

format(1)

cpu(1)

get_df(1)

gpu(1)

choose_action(1)

as_dict(1)

learn(1)

memory(1)

run(1)

Пример #1

Показать файл

Файл: ddpg.py Проект: joaopamaral/deep_reinforcement_learning_nd_continuous_control_p2

def run_single_episode(env: UnityEnvironment,
                       brain_name,
                       agent: Agent = None,
                       train_mode=False,
                       max_t=2000,
                       epsilon=0.0):
    """
    Execute a single episode

    Params
    ======
        env (UnityEnvironment): enviroment
        brain_name (string): default brain name
        agent (Agent): agent that is responsible for control the actions (if no agent, a random action is chosen)
        train_mode (bool): indicate if the environment is on the train mode
        max_t (int): max number of steps in each episode
        epsilon (float): attenuate the noise applied to the action

    Return
    ======
        scores (float): episode scores of all agents
    """
    env_info = env.reset(train_mode=train_mode)[brain_name]
    num_agents = len(env_info.agents)
    action_size = env.brains[brain_name].vector_action_space_size

    states = env_info.vector_observations
    scores = np.zeros(num_agents)  # initialize the score (for each agent)

    # Run all the steps of one episode
    for time_step in range(1, max_t + 1):
        if agent:  # if a agent is provide, get all the action (for each agent)
            actions = agent.act(states, epsilon=epsilon, add_noise=train_mode)
        else:  # select a random action (if no agent)
            actions = np.random.randn(num_agents, action_size)
        actions = np.clip(actions, -1, 1)  # all actions between -1 and 1
        env_info = env.step(actions)[
            brain_name]  # send the action to the environment

        next_states = env_info.vector_observations  # get the next state
        rewards = env_info.rewards  # get the reward
        dones = env_info.local_done  # get the done flag

        if agent and train_mode:  # learn if in train mode
            agent.step(states, actions, rewards, next_states, dones, time_step)

        states = next_states
        scores += rewards  # increase the scores (for each agent)
        if np.any(dones):  # Exit episode if done
            break

    return scores

Пример #2

Показать файл

Файл: dqn.py Проект: joaopamaral/deep_reinforcement_learning_nd_navigation_p1

def run_single_episode(env: UnityEnvironment,
                       brain_name,
                       agent: Agent = None,
                       max_t=1000,
                       eps=0.,
                       train_mode=False):
    """
    Execute a single episode

    Params
    ======
        env (UnityEnvironment): enviroment
        brain_name (string): default brain name
        agent (Agent): agent that is responsible for control the actions (if no agent, a random action is chosen)
        max_t (int): max steps in each episode
        train_mode (bool): indicate if the environment is on the train mode

    Return
    ======
        score (float): total score of episode
    """
    env_info = env.reset(train_mode=train_mode)[brain_name]
    action_size = env.brains[brain_name].vector_action_space_size
    state = env_info.vector_observations[0]

    score = 0
    for _ in range(max_t):  # Run each step in episode
        action = agent.act(state,
                           eps) if agent else np.random.randint(action_size)

        env_info = env.step(action)[
            brain_name]  # send the action to the environment

        next_state = env_info.vector_observations[0]  # get the next state
        reward = env_info.rewards[0]  # get the reward
        done = env_info.local_done[0]  # get the done flag

        if agent and train_mode:
            agent.step(state, action, reward, next_state, done)

        state = next_state
        score += reward
        if done:  # Exit episode if done
            break

    return score