Python AgentSamples примеры использования

Язык программирования: Python

Пространство имен/Пакет: rlpyt.samplers.collections

Класс/Тип: AgentSamples

Примеров на hotexamples.com: 5

Python AgentSamples - 5 примеров найдено. Это лучшие примеры Python кода для rlpyt.samplers.collections.AgentSamples, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

AgentSamples(5)

Основные методы

AgentSamples (5)

Пример #1

Показать файл

Файл: test_dreamer.py Проект: IcarusWizard/dreamer-pytorch

def test_loss():
    batch_b = 2
    batch_t = 4
    stoch_state_dim = 3
    deter_state_dim = 4
    action_size = 3
    img_size = (3, 64, 64)  # TODO: figure out why atari games have 4 channels.

    dreamer = make_dreamer(action_size)

    # categorical action tensor
    action = torch.randint(action_size, (batch_t, batch_b))
    prev_action = torch.randn(batch_t, batch_b, action_size)
    observation = torch.randn(batch_t, batch_b, *img_size)
    env_reward = torch.randn(batch_t, batch_b, 1)
    prev_reward = torch.randn(batch_t, batch_b)
    done = torch.zeros(batch_t, batch_b, dtype=torch.bool)
    env_info = EnvInfo()
    prev_state = make_rssm_state(batch_t, batch_b, stoch_state_dim,
                                 deter_state_dim)
    agent_info = DreamerAgentInfo(prev_state=prev_state)
    agent_samples = AgentSamples(action=action,
                                 prev_action=prev_action,
                                 agent_info=agent_info)
    env_samples = EnvSamples(observation=observation,
                             reward=env_reward,
                             prev_reward=prev_reward,
                             done=done,
                             env_info=env_info)
    samples = Samples(agent=agent_samples, env=env_samples)
    loss = dreamer.loss(samples)

    # Check we have a single-element FloatTensor with a gradient
    assert isinstance(loss, torch.FloatTensor)
    assert loss.requires_grad
    assert loss.shape == ()

    # Check it still works if we pass in discrete actions
    num_actions = 6
    dreamer = make_dreamer(num_actions)
    action = torch.randint(0, num_actions, (batch_t, batch_b))
    prev_action = torch.randint(0, num_actions, (batch_t, batch_b))
    agent_samples = AgentSamples(action=action,
                                 prev_action=prev_action,
                                 agent_info=agent_info)
    env_samples = EnvSamples(observation=observation,
                             reward=env_reward,
                             prev_reward=prev_reward,
                             done=done,
                             env_info=env_info)
    samples = Samples(agent=agent_samples, env=env_samples)
    loss = dreamer.loss(samples)
    assert isinstance(loss, torch.FloatTensor)
    assert loss.requires_grad
    assert loss.shape == ()

Пример #2

Показать файл

def build_samples_buffer(agent,
                         env,
                         batch_spec,
                         bootstrap_value=False,
                         agent_shared=True,
                         env_shared=True,
                         subprocess=True,
                         examples=None):
    """Recommended to step/reset agent and env in subprocess, so it doesn't
    affect settings in master before forking workers (e.g. torch num_threads
    (MKL) may be set at first forward computation.)"""
    if examples is None:
        if subprocess:
            mgr = mp.Manager()
            examples = mgr.dict()  # Examples pickled back to master.
            w = mp.Process(target=get_example_outputs,
                           args=(agent, env, examples, subprocess))
            w.start()
            w.join()
        else:
            examples = dict()
            get_example_outputs(agent, env, examples)

    T, B = batch_spec
    all_action = buffer_from_example(examples["action"], (T + 1, B),
                                     agent_shared)
    action = all_action[1:]
    prev_action = all_action[:
                             -1]  # Writing to action will populate prev_action.
    agent_info = buffer_from_example(examples["agent_info"], (T, B),
                                     agent_shared)
    agent_buffer = AgentSamples(
        action=action,
        prev_action=prev_action,
        agent_info=agent_info,
    )
    if bootstrap_value:
        bv = buffer_from_example(examples["agent_info"].value, (1, B),
                                 agent_shared)
        agent_buffer = AgentSamplesBsv(*agent_buffer, bootstrap_value=bv)

    observation = buffer_from_example(examples["observation"], (T, B),
                                      env_shared)
    all_reward = buffer_from_example(examples["reward"], (T + 1, B),
                                     env_shared)
    reward = all_reward[1:]
    prev_reward = all_reward[:
                             -1]  # Writing to reward will populate prev_reward.
    done = buffer_from_example(examples["done"], (T, B), env_shared)
    env_info = buffer_from_example(examples["env_info"], (T, B), env_shared)
    env_buffer = EnvSamples(
        observation=observation,
        reward=reward,
        prev_reward=prev_reward,
        done=done,
        env_info=env_info,
    )
    samples_np = Samples(agent=agent_buffer, env=env_buffer)
    samples_pyt = torchify_buffer(samples_np)
    return samples_pyt, samples_np, examples

Пример #3

Показать файл

def build_samples_buffer(agent, env, batch_spec, bootstrap_value=False,
        agent_shared=True, env_shared=True, subprocess=True, examples=None):
    """Recommended to step/reset agent and env in subprocess, so it doesn't
    affect settings in master before forking workers (e.g. torch num_threads
    (MKL) may be set at first forward computation.)"""
    # import ipdb; ipdb.set_trace()
    if examples is None:
        if subprocess:
            mgr = mp.Manager()
            examples = mgr.dict()  # Examples pickled back to master.
            w = mp.Process(target=get_example_outputs,
                args=(agent, env, examples, subprocess))
            w.start()
            w.join()
        else:
            examples = dict()
            get_example_outputs(agent, env, examples)

    T, B = batch_spec
    all_action = buffer_from_example(examples["action"], (T + 1, B), agent_shared)
    action = all_action[1:]
    prev_action = all_action[:-1]  # Writing to action will populate prev_action.
    # import ipdb; ipdb.set_trace()
    agent_info = buffer_from_example(examples["agent_info"], (T, B), agent_shared)
    agent_buffer = AgentSamples(
        action=action,
        prev_action=prev_action,
        agent_info=agent_info,
    )
    if bootstrap_value:        
        if agent.dual_model:
            bv = buffer_from_example(examples["agent_info"].value, (1, B), agent_shared)
            int_bv = buffer_from_example(examples["agent_info"].value, (1, B), agent_shared)
            agent_buffer = AgentSamplesBsvTwin(*agent_buffer, bootstrap_value=bv, int_bootstrap_value=int_bv)
        else:
            bv = buffer_from_example(examples["agent_info"].value, (1, B), agent_shared)
            agent_buffer = AgentSamplesBsv(*agent_buffer, bootstrap_value=bv)

    observation = buffer_from_example(examples["observation"], (T, B), env_shared) # all zero arrays (except 0th index should equal o_reset)
    next_observation = buffer_from_example(examples["observation"], (T, B), env_shared) 
    all_reward = buffer_from_example(examples["reward"], (T + 1, B), env_shared) # all zero values
    reward = all_reward[1:]
    prev_reward = all_reward[:-1]  # Writing to reward will populate prev_reward.
    done = buffer_from_example(examples["done"], (T, B), env_shared)
    env_info = buffer_from_example(examples["env_info"], (T, B), env_shared)
    env_buffer = EnvSamples(
        observation=observation,
        next_observation=next_observation,
        prev_reward=prev_reward,
        reward=reward,
        done=done,
        env_info=env_info,
    )
    samples_np = Samples(agent=agent_buffer, env=env_buffer)
    samples_pyt = torchify_buffer(samples_np) # this links the two (changes to samples_np will reflect in samples_pyt)
    return samples_pyt, samples_np, examples

Пример #4

Показать файл

def build_samples_buffer(agent, env, batch_spec, bootstrap_value=False,
                         agent_shared=True, env_shared=True, subprocess=True, examples=None):
    """Recommended to step/reset agent and env in subprocess, so it doesn't
    affect settings in master before forking workers (e.g. torch num_threads
    (MKL) may be set at first forward computation.)

    :param agent: 一个Agent类的对象。
    :param env: 一个environment类的对象。
    :param batch_spec: 一个BatchSpec类的对象。
    """
    if examples is None:
        if subprocess:  # 创建子进程
            mgr = mp.Manager()  # Manager模块用于资源共享
            examples = mgr.dict()  # Examples pickled back to master. 可以被子进程共享的全局变量
            w = mp.Process(target=get_example_outputs,
                           args=(agent, env, examples, subprocess))  # 创建worker进程，此进程执行的是target指定的函数，参数由args指定
            w.start()
            w.join()
        else:
            examples = dict()
            get_example_outputs(agent, env, examples)  # examples会在get_example_outputs()函数中被更新，所以没有返回值

    T, B = batch_spec  # time step数，以及environment实例数
    all_action = buffer_from_example(examples["action"], (T + 1, B), agent_shared)
    action = all_action[1:]
    prev_action = all_action[:-1]  # Writing to action will populate prev_action.
    agent_info = buffer_from_example(examples["agent_info"], (T, B), agent_shared)
    agent_buffer = AgentSamples(
        action=action,
        prev_action=prev_action,
        agent_info=agent_info,
    )
    if bootstrap_value:
        bv = buffer_from_example(examples["agent_info"].value, (1, B), agent_shared)
        agent_buffer = AgentSamplesBsv(*agent_buffer, bootstrap_value=bv)

    observation = buffer_from_example(examples["observation"], (T, B), env_shared)
    all_reward = buffer_from_example(examples["reward"], (T + 1, B), env_shared)
    reward = all_reward[1:]
    prev_reward = all_reward[:-1]  # Writing to reward will populate prev_reward.
    done = buffer_from_example(examples["done"], (T, B), env_shared)
    env_info = buffer_from_example(examples["env_info"], (T, B), env_shared)
    env_buffer = EnvSamples(
        observation=observation,
        reward=reward,
        prev_reward=prev_reward,
        done=done,
        env_info=env_info,
    )
    samples_np = Samples(agent=agent_buffer, env=env_buffer)
    samples_pyt = torchify_buffer(samples_np)
    return samples_pyt, samples_np, examples

Пример #5

Показать файл

Файл: buffer.py Проект: lowellw6/intrinsic-rl

def build_intrinsic_samples_buffer(agent,
                                   env,
                                   batch_spec,
                                   bootstrap_value=False,
                                   next_obs=False,
                                   agent_shared=True,
                                   env_shared=True,
                                   subprocess=True,
                                   examples=None):
    """
    Replaces ``build_samples_buffer`` to add additional buffer space for intrinsic bonus agents.
    If bootstrap_value=True, also adds space for int_bootstrap_value from intrinsic value head.
    If next_obs=True, also adds space for next observations (NOTE: This is memory intensive with
    raw pixel states, as it doubles the space to store images. Keep this as False unless the
    algorithm needs it).
    """
    if examples is None:
        if subprocess:
            mgr = mp.Manager()
            examples = mgr.dict()  # Examples pickled back to master.
            w = mp.Process(target=get_example_outputs,
                           args=(agent, env, examples, subprocess))
            w.start()
            w.join()
        else:
            examples = dict()
            get_example_outputs(agent, env, examples)

    T, B = batch_spec
    all_action = buffer_from_example(examples["action"], (T + 1, B),
                                     agent_shared)
    action = all_action[1:]
    prev_action = all_action[:
                             -1]  # Writing to action will populate prev_action.
    agent_info = buffer_from_example(examples["agent_info"], (T, B),
                                     agent_shared)
    agent_buffer = AgentSamples(
        action=action,
        prev_action=prev_action,
        agent_info=agent_info,
    )
    if bootstrap_value:  # Added buffer space for intrinsic bootstrap value
        bv = buffer_from_example(examples["agent_info"].ext_value, (1, B),
                                 agent_shared)
        int_bv = buffer_from_example(examples["agent_info"].int_value, (1, B),
                                     agent_shared)
        agent_buffer = IntAgentSamplesBsv(*agent_buffer,
                                          bootstrap_value=bv,
                                          int_bootstrap_value=int_bv)

    observation = buffer_from_example(examples["observation"], (T, B),
                                      env_shared)
    all_reward = buffer_from_example(examples["reward"], (T + 1, B),
                                     env_shared)
    reward = all_reward[1:]
    prev_reward = all_reward[:
                             -1]  # Writing to reward will populate prev_reward.
    done = buffer_from_example(examples["done"], (T, B), env_shared)
    env_info = buffer_from_example(examples["env_info"], (T, B), env_shared)

    if next_obs:  # Add buffer space for next obs, if specified
        next_observation = buffer_from_example(examples["observation"], (T, B),
                                               env_shared)
        env_buffer = EnvSamplesPlus(
            observation=observation,
            next_observation=next_observation,
            reward=reward,
            prev_reward=prev_reward,
            done=done,
            env_info=env_info,
        )
    else:
        env_buffer = EnvSamples(
            observation=observation,
            reward=reward,
            prev_reward=prev_reward,
            done=done,
            env_info=env_info,
        )

    samples_np = Samples(agent=agent_buffer, env=env_buffer)
    samples_pyt = torchify_buffer(samples_np)
    return samples_pyt, samples_np, examples